""" Counting unique words, and the frequencies of words, from the text stored in a specific file. Improvements from word_count_take_3: - We change print_word_frequencies so that the results are printed sorted by frequency. To do this, we: 1. define a helper function called inverse_dictionary. 2. we modify print_word_frequenciecs to use that helper function. """ import os """ function count_words: argument: a filename return value: a dictionary, mapping words to frequencies. description: this function builds a dictionary such that dictionary[word] is the number of times the word occurs in the filename. """ def count_words(filename): if (os.path.isfile(filename) == False): print("\nError: file " + filename + " does not exist.\n") return in_file = open(filename, "r") # initialize the dictionary to empty result = {} for line in in_file: words = process_line(line) for word in words: if (word in result): result[word] += 1 else: result[word] = 1 return result """ function process_line: argument: a line of text return value: a list of words in that line, ignoring case and punctuation. Words appearing multiple times in the line will be included multiple lines in the output. Note: the dash character '-' is replaced by space, as dashes separate individual words from each other. """ def process_line(line): line = line.lower() new_line = "" for letter in line: # note: since we want to include double quotes and single quotes # in the list of characters to ignore, we must use triple quotes # here. if letter in """,.!"'()""": continue elif letter == '-': letter = ' ' new_line = new_line + letter words = new_line.split() return words """ function print_word_frequencies: argument: a dictionary called in_dictionary return value: an inverse dictionary out_dictionary, such that out_dictionary[value] is the list of all keys in in_dictionary that contain that value. description: this function prints the contents of the dictionary in a way that is easy to read. """ def inverse_dictionary(in_dictionary): out_dictionary = {} for key in in_dictionary: value = in_dictionary[key] if (value in out_dictionary): list_of_keys = out_dictionary[value] list_of_keys.append(key) else: # note that we store a list containing the key, not just # the key itself. out_dictionary[value] = [key] return out_dictionary """ function print_word_frequencies: argument: a dictionary, mapping words to frequencies return value: nothing is returned. description: this function prints the contents of the dictionary in a way that is easy to read. """ def print_word_frequencies(dictionary): print() inverse = inverse_dictionary(dictionary) frequencies = inverse.keys() # convert frequencies to a list, so that we can sort it. frequencies = list(frequencies) frequencies.sort() # the sort() method sorts in ascending order. We want descending order, # so we reverse the list. frequencies.reverse() for frequency in frequencies: list_of_words = inverse[frequency] # for words with the same frequency, we want them sorted in # alphabetical order, so we sort them. list_of_words.sort() for word in list_of_words: print(word + ":", frequency) print() print(len(dictionary), 'words found\n') def main(): filename = "file1.txt" dictionary = count_words(filename) print_word_frequencies(dictionary) main()