"""
Counting unique words, and the frequencies of words,
from the text stored in a specific file.

Improvements from word_count_take_2:
- defined a separate function process_line to preprocess each line of
text and extract words. This new function throws out punctuation symbols.

Remaining problems:
- We want to sort results by frequency.
"""

import os


"""
function count_words:
    argument: a filename
    return value: a dictionary, mapping words to frequencies.
    description: this function builds a dictionary such that
        dictionary[word] is the number of times the word occurs in
        the filename.
"""
def count_words(filename):
    if (os.path.isfile(filename) == False):
        print("\nError: file " + filename + " does not exist.\n")
        return

    in_file = open(filename, "r")

    # initialize the dictionary to empty
    result = {}
    for line in in_file:
        words = process_line(line)
        for word in words:
            if (word in result):
                result[word] += 1
            else:
                result[word] = 1

    return result

"""
function process_line:
    argument: a line of text
    return value: a list of words in that line, ignoring case and punctuation.
        Words appearing multiple times in the line will be included multiple
        lines in the output.

    Note: the dash character '-' is replaced by space, as dashes separate
    individual words from each other.
"""
def process_line(line):
    line = line.lower()
    new_line = ""

    for letter in line:
        # note: since we want to include double quotes and single quotes
        # in the list of characters to ignore, we must use triple quotes
        # here.
        if letter in """,.!"'()""":
            continue
        elif letter == '-':
            letter = ' '
        
        new_line = new_line + letter

    words = new_line.split()
    return words

"""
function print_word_frequencies:
    argument: a dictionary, mapping words to frequencies
    return value: nothing is returned.
    description: this function prints the contents of the dictionary in
        a way that is easy to read.
"""
def print_word_frequencies(dictionary):
    print()
    for word in dictionary:
        frequency = dictionary[word]
        print(word + ":", frequency)

    print()
    print(len(dictionary), 'words found\n')

def main():
    filename = "file1.txt"
    dictionary = count_words(filename)
    print_word_frequencies(dictionary)

main()