#%%

"""
Credits: This code is adapted from the textbook "Deep Learning with Python", 
2nd Edition, by François Chollet. 
"""

#%%

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import TextVectorization

from transformers_common import *
from tv_to_file import *

#%% Load training, validation, and test set for the Large Movie Review Dataset.
#   Each of these sets will be a BatchDataset object.

batch_size = 32
train_ds = keras.utils.text_dataset_from_directory(
    "../../../../../home/cse4392_data/20_text/aclImdb/train", batch_size=batch_size)

val_ds = keras.utils.text_dataset_from_directory(
    "../../../../../home/cse4392_data/20_text/aclImdb/val", batch_size=batch_size)

test_ds = keras.utils.text_dataset_from_directory(
    "../../../../../home/cse4392_data/20_text/aclImdb/test", batch_size=batch_size)

#%% Load the text vectorization layer (to ensure that it matches the
#   version that was used in training), and vectorize the dataset.

tv_file = "model_backup/second_run_reviews/transformer_encoder"
text_vectorization = load_text_vectorization(tv_file)

int_train_ds = train_ds.map(lambda x, y: (text_vectorization(x), y))
int_val_ds = val_ds.map(lambda x, y: (text_vectorization(x), y))
int_test_ds = test_ds.map(lambda x, y: (text_vectorization(x), y))

#%% Load the pre-trained model that does NOT use positional embeddings.

model = keras.models.load_model(
    "model_backup/second_run_reviews/transformer_encoder.keras",
    custom_objects={"TransformerEncoder": TransformerEncoder})

print(f"Test acc: {model.evaluate(int_test_ds)[1]:.3f}")

# The resulting test accuracy should be 
# 88.32% for model trained in first run
# 86.49% for model trained in second run

#%% Load the pre-trained model that DOES use positional embeddings.

model = keras.models.load_model("model_backup/second_run_reviews/full_transformer_encoder.keras",
                                custom_objects={"TransformerEncoder": TransformerEncoder,
                                                "PositionalEmbedding": PositionalEmbedding})
print(f"Test acc: {model.evaluate(int_test_ds)[1]:.3f}")

# The resulting test accuracy should be 
# 88.13% for model trained in first run
# 87.54 for model trained in second run

#%%