#%% """ Credits: This code is adapted from the textbook "Deep Learning with Python", 2nd Edition, by François Chollet. """ #%% import numpy as np import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers from tensorflow.keras.layers import TextVectorization import random import string import re from transformers_common import * from tv_to_file import * #%% We are using a fixed split of the dataset into training, test and # validation, to make sure that we can later duplicate the exact same # text vectorization layers that we used at training. train_pairs = load_pairs("model_backup/eng_spa_train.txt") val_pairs = load_pairs("model_backup/eng_spa_val.txt") test_pairs = load_pairs("model_backup/eng_spa_test.txt") #%% Create text vectorization layers for English text and for Spanish text. strip_chars = string.punctuation + "¿" strip_chars = strip_chars.replace("[", "") strip_chars = strip_chars.replace("]", "") def custom_standardization(input_string): lowercase = tf.strings.lower(input_string) return tf.strings.regex_replace( lowercase, f"[{re.escape(strip_chars)}]", "") vocab_size = 15000 sequence_length = 20 source_vectorization = layers.TextVectorization(max_tokens=vocab_size, output_mode="int", output_sequence_length=sequence_length,) target_vectorization = layers.TextVectorization(max_tokens=vocab_size, output_mode="int", output_sequence_length=sequence_length + 1, standardize=custom_standardization,) train_english_texts = [pair[0] for pair in train_pairs] train_spanish_texts = [pair[1] for pair in train_pairs] source_vectorization.adapt(train_english_texts) target_vectorization.adapt(train_spanish_texts) #%% Load the pre-trained transformer model. #filename = "model_backup/eng_spa_transformer_best.keras" filename = "model_backup/eng_spa_transformer_final1.keras" transformer = keras.models.load_model( filename, custom_objects={"TransformerEncoder": TransformerEncoder, "PositionalEmbedding": PositionalEmbedding, "TransformerDecoder": TransformerDecoder,}) spa_vocab = target_vectorization.get_vocabulary() spa_index_lookup = dict(zip(range(len(spa_vocab)), spa_vocab)) test_eng_texts = [pair[0] for pair in test_pairs] test_spa_texts = [pair[1] for pair in test_pairs] #%% Use the model to translate a random sentence from the test set. index = random.randint(0, len(test_eng_texts)) input_sentence = test_eng_texts[index] target= test_spa_texts[index] result = decode_sequence(transformer, input_sentence, source_vectorization, target_vectorization, spa_index_lookup) print("Input: \"%s\"" % (input_sentence)) print("Result: \"%s\"" % (result)) print("Target: \"%s\"" % (target)) #%% Use the model to translate some input text that we specify. input_text = "I did not like this movie at all" #input_text = "if it rains I won't go shopping" print(input_text) print(decode_sequence(transformer, input_text, source_vectorization, target_vectorization, spa_index_lookup))