tiyd-python-2015-05 · Callus4815 · May 7, 2015 · Sep 8, 2015 · Sep 8, 2015
diff --git a/word-counter.py b/word-counter.py
@@ -0,0 +1,43 @@
+import re
+
+
+def get_text():
+    with open('sample.txt') as file:#imports file(and closes file when done with it??)
+
+        file = file.read()
+        file = re.sub(r'[^A-Za-z\s]','',file)#strips characters
+        file = re.sub(r'[\s]+',' ', file)#strips whitespace
+        file = file.lower()#lowercases
+        a_list = file.split()#turns file to a list of strings
+    return a_list
+def count_text(a_list):
+    wordcount={}#my empty list to plug into
+    for word in a_list:
+        if word not in wordcount:
+            wordcount[word] = 1
+        else:                   #Counted the occurences of all words and added them to list(wordcount)
+            wordcount[word] += 1
+    return wordcount
+
+def frequency(wordcount):
+    a_dict = {}
+    nums = sorted(wordcount.items(),key = lambda x:x[1], reverse=True)
+    sorts_dic = nums[0:20]
+    for item in sorts_dic:
+        a_dict.update({item[0] : item[1]});
+    return a_dict
+def print_results(a_dict):
+    an_dict = sorted(a_dict.items(),key=lambda x : x[1],reverse=True)
+    for items in an_dict:
+        print(items[0]+" "+str(items[1]))
+
+
+
+def main():
+    a_list = get_text()
+    wordcount =  count_text(a_list)
+    a_dict = frequency(wordcount)
+    print_results(a_dict)
+
+
+main()
diff --git a/word_counter.py b/word_counter.py
@@ -0,0 +1,66 @@
+import re
+
+
+def get_text():
+    with open('sample.txt') as file:  #imports file(and closes file when done with it??)
+        file = file.read()
+        file = re.sub(r'[^A-Za-z0-9]',' ',file)#strips characters
+        file = re.sub(r'[\s]+  ','', file)#strips whitespace
+        file = file.lower()#lowercases
+        a_list = file.split()#turns file to a list of strings
+    return a_list
+
+def ignore_words(file):
+    words=[]
+    all_words = get_text()
+    common_words = ['a', 'able', 'about', 'across', 'after', 'all', 'almost',
+     'also', 'am', 'among', 'an', 'and', 'any', 'are', 'as', 'at', 'be',
+     'because', 'been', 'but', 'by', 'can', 'cannot', 'could', 'dear', 'did',
+     'do', 'does', 'either', 'else', 'ever', 'every', 'for', 'from', 'get',
+      'got', 'had', 'has', 'have', 'he', 'her', 'hers', 'him', 'his', 'how',
+      'however', 'i', 'if', 'in', 'into', 'is', 'it', 'its', 'just',
+      'least', 'let', 'like', 'likely', 'may', 'me', 'might', 'most', 'must',
+      'my', 'neither', 'no', 'nor', 'not', 'of', 'off', 'often', 'on',
+      'only', 'or', 'other', 'our', 'own', 'rather', 'said', 'say', 'says',
+      'she', 'should', 'since', 'so', 'some', 'than', 'that', 'the',
+      'their', 'them', 'then', 'there', 'these', 'they', 'this', 'tis', 'to',
+      'too', 'twas', 'us', 'wants', 'was', 'we', 'were', 'what', 'when',
+      'where', 'which', 'while', 'who', 'whom', 'why', 'will', 'with',
+      'would', 'yet', 'you', 'your', 's']
+    for word in all_words:
+        if word not in common_words:
+            words.append(word)
+    return words
+
+def count_text(a_list):
+    wordcount={}#my empty dictionary to plug into
+    for word in a_list:
+        if word not in wordcount:
+            wordcount[word] = 1
+        else:                   #Counted the occurences of all words and added them to list(wordcount)
+            wordcount[word] += 1
+    return wordcount
+
+def top_20(a_dict):
+    a_dict = dict(a_dict)
+    top_list = sorted(a_dict.items(), key=lambda x: x[1], reverse=True)
+    top20 = top_list[:20]
+    return top20
+
+def normalize(a_list, normalize_to=50):
+    divisor = a_list[0][1] / normalize_to
+    return divisor
+
+def tables(a_list):
+    for index in a_list:
+        print(index[0].ljust(10),"|".center(1),
+        ("#"*(int(index[1])//normalize(a_list))).rjust(1))
+
+def word_frequency():
+    clean = get_text()
+    not_ignored = ignore_words(clean)
+    counter = count_text(not_ignored)
+    freq = top_20(counter)
+    tables(freq)
+
+word_frequency()