#%% """ Credits: This code is adapted from the textbook "Deep Learning with Python", 2nd Edition, by François Chollet. """ #%% import numpy as np import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers from tensorflow.keras.layers import TextVectorization from transformers_common import * from tv_to_file import * #%% Load training, validation, and test set for the Large Movie Review Dataset. # Each of these sets will be a BatchDataset object. batch_size = 32 train_ds = keras.utils.text_dataset_from_directory( "../../../../../home/cse4392_data/20_text/aclImdb/train", batch_size=batch_size) val_ds = keras.utils.text_dataset_from_directory( "../../../../../home/cse4392_data/20_text/aclImdb/val", batch_size=batch_size) test_ds = keras.utils.text_dataset_from_directory( "../../../../../home/cse4392_data/20_text/aclImdb/test", batch_size=batch_size) #%% Create text vectorization layer, save it to file, and use it # to vectorize the dataset. max_tokens = 20000 max_length = 600 text_vectorization = TextVectorization(max_tokens=max_tokens, output_mode="int", output_sequence_length=max_length,) text_only_train_ds = train_ds.map(lambda x, y: x) # replacing call to adapt() with my code, to allow saving the text # vectorization layer to a file. #text_vectorization.adapt(text_only_train_ds) tv_file = "transformer_encoder" set_tv_vocabulary(text_vectorization, text_only_train_ds, tv_file) int_train_ds = train_ds.map(lambda x, y: (text_vectorization(x), y)) int_val_ds = val_ds.map(lambda x, y: (text_vectorization(x), y)) int_test_ds = test_ds.map(lambda x, y: (text_vectorization(x), y)) #%% Define the Transformer model. This version does NOT use positional # embeddings. vocab_size = 20000 embed_dim = 256 num_heads = 2 dense_dim = 32 inputs = keras.Input(shape=(None,), dtype="int64") x = layers.Embedding(vocab_size, embed_dim)(inputs) x = TransformerEncoder(embed_dim, dense_dim, num_heads)(x) x = layers.GlobalMaxPooling1D()(x) x = layers.Dropout(0.5)(x) outputs = layers.Dense(1, activation="sigmoid")(x) model = keras.Model(inputs, outputs) model.compile(optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"]) model.summary() #%% Train the model callbacks = [keras.callbacks.ModelCheckpoint("transformer_encoder.keras", save_best_only=True)] model.fit(int_train_ds, validation_data=int_val_ds, epochs=20, callbacks=callbacks) #%% Define the Transformer model. This version uses positional # embeddings. vocab_size = 20000 sequence_length = 600 embed_dim = 256 num_heads = 2 dense_dim = 32 inputs = keras.Input(shape=(None,), dtype="int64") x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(inputs) x = TransformerEncoder(embed_dim, dense_dim, num_heads)(x) x = layers.GlobalMaxPooling1D()(x) x = layers.Dropout(0.5)(x) outputs = layers.Dense(1, activation="sigmoid")(x) model = keras.Model(inputs, outputs) model.compile(optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"]) model.summary() callbacks = [keras.callbacks.ModelCheckpoint("full_transformer_encoder.keras", save_best_only=True)] #%% Train the model. model.fit(int_train_ds, validation_data=int_val_ds, epochs=20, callbacks=callbacks) # takes about 10.5 minutes per epoch, 3.5 hours for 20 epochs. #%%