#%% """ Credits: This code is adapted from the textbook "Deep Learning with Python", 2nd Edition, by François Chollet. """ #%% import numpy as np import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers from tensorflow.keras.layers import TextVectorization from transformers_common import * from tv_to_file import * #%% Load training, validation, and test set for the Large Movie Review Dataset. # Each of these sets will be a BatchDataset object. batch_size = 32 train_ds = keras.utils.text_dataset_from_directory( "../../../../../home/cse4392_data/20_text/aclImdb/train", batch_size=batch_size) val_ds = keras.utils.text_dataset_from_directory( "../../../../../home/cse4392_data/20_text/aclImdb/val", batch_size=batch_size) test_ds = keras.utils.text_dataset_from_directory( "../../../../../home/cse4392_data/20_text/aclImdb/test", batch_size=batch_size) #%% Load the text vectorization layer (to ensure that it matches the # version that was used in training), and vectorize the dataset. tv_file = "model_backup/second_run_reviews/transformer_encoder" text_vectorization = load_text_vectorization(tv_file) int_train_ds = train_ds.map(lambda x, y: (text_vectorization(x), y)) int_val_ds = val_ds.map(lambda x, y: (text_vectorization(x), y)) int_test_ds = test_ds.map(lambda x, y: (text_vectorization(x), y)) #%% Load the pre-trained model that does NOT use positional embeddings. model = keras.models.load_model( "model_backup/second_run_reviews/transformer_encoder.keras", custom_objects={"TransformerEncoder": TransformerEncoder}) print(f"Test acc: {model.evaluate(int_test_ds)[1]:.3f}") # The resulting test accuracy should be # 88.32% for model trained in first run # 86.49% for model trained in second run #%% Load the pre-trained model that DOES use positional embeddings. model = keras.models.load_model("model_backup/second_run_reviews/full_transformer_encoder.keras", custom_objects={"TransformerEncoder": TransformerEncoder, "PositionalEmbedding": PositionalEmbedding}) print(f"Test acc: {model.evaluate(int_test_ds)[1]:.3f}") # The resulting test accuracy should be # 88.13% for model trained in first run # 87.54 for model trained in second run #%%