2020-12-07 23:11:38 +01:00
|
|
|
import re
|
2020-12-12 16:14:39 +01:00
|
|
|
from collections import defaultdict, Counter
|
2020-12-07 23:11:38 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def count_words_v1(sentence):
|
|
|
|
|
count = defaultdict(int)
|
|
|
|
|
for word in sentence.split():
|
|
|
|
|
count[word.lower()] += 1
|
|
|
|
|
return count
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def count_words_v2(sentence):
|
|
|
|
|
regex = r"[\w|']+"
|
|
|
|
|
count = defaultdict(int)
|
|
|
|
|
for word in re.findall(regex, sentence):
|
|
|
|
|
count[word.lower()] += 1
|
|
|
|
|
return count
|
|
|
|
|
|
|
|
|
|
|
2020-12-12 16:14:39 +01:00
|
|
|
def count_words_v3(sentence):
|
|
|
|
|
"""Augmented by trey's regex."""
|
|
|
|
|
regex = r"\b[\w'-]+\b"
|
|
|
|
|
count = defaultdict(int)
|
|
|
|
|
for word in re.findall(regex, sentence):
|
|
|
|
|
count[word.lower()] += 1
|
|
|
|
|
return count
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def count_words_v4(sentence):
|
|
|
|
|
"""And the most compact yet pythonic solution."""
|
|
|
|
|
return Counter(re.findall(r"\b[\w'-]+\b", sentence.lower()))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
count_words = count_words_v4
|