""" Counting unique words, and the frequencies of words, from the text stored in a specific file. Improvements from word_count_take_2: - defined a separate function process_line to preprocess each line of text and extract words. This new function throws out punctuation symbols. Remaining problems: - We want to sort results by frequency. """ import os """ function count_words: argument: a filename return value: a dictionary, mapping words to frequencies. description: this function builds a dictionary such that dictionary[word] is the number of times the word occurs in the filename. """ def count_words(filename): if (os.path.isfile(filename) == False): print("\nError: file " + filename + " does not exist.\n") return in_file = open(filename, "r") # initialize the dictionary to empty result = {} for line in in_file: words = process_line(line) for word in words: if (word in result): result[word] += 1 else: result[word] = 1 return result """ function process_line: argument: a line of text return value: a list of words in that line, ignoring case and punctuation. Words appearing multiple times in the line will be included multiple lines in the output. Note: the dash character '-' is replaced by space, as dashes separate individual words from each other. """ def process_line(line): line = line.lower() new_line = "" for letter in line: # note: since we want to include double quotes and single quotes # in the list of characters to ignore, we must use triple quotes # here. if letter in """,.!"'()""": continue elif letter == '-': letter = ' ' new_line = new_line + letter words = new_line.split() return words """ function print_word_frequencies: argument: a dictionary, mapping words to frequencies return value: nothing is returned. description: this function prints the contents of the dictionary in a way that is easy to read. """ def print_word_frequencies(dictionary): print() for word in dictionary: frequency = dictionary[word] print(word + ":", frequency) print() print(len(dictionary), 'words found\n') def main(): filename = "file1.txt" dictionary = count_words(filename) print_word_frequencies(dictionary) main()