import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
from utilities_300 import *

# load and preprocess dataset

dataset = keras.datasets.mnist
#dataset = keras.datasets.fashion_mnist

# the data, split between train and test sets
(train_inputs, train_labels), (test_inputs, test_labels) = dataset.load_data()
number_of_classes = np.max([np.max(train_labels), np.max(test_labels)]) + 1
input_shape = train_inputs[0].shape

# Scale images to the [0, 1] range
train_inputs = train_inputs.astype("float32") / 255
test_inputs = test_inputs.astype("float32") / 255

# Right now, train_inputs[i] (for any i) is a 28x28 2D array.
# The Conv2D layer requires each input to be a 3D array.
# We use the np.expand_dims function to convert each training input to 
# a 3D array of shape (28, 28, 1), to make it compatible with the Conv2D layer.
train_inputs = np.expand_dims(train_inputs, -1)
test_inputs = np.expand_dims(test_inputs, -1)

print("x_train shape:", train_inputs.shape)
print(train_inputs.shape[0], "train samples")
print(test_inputs.shape[0], "test samples")


#%% Create "big" dataset and "small" dataset

original_train_inputs = train_inputs
original_train_labels = train_labels
original_test_inputs = test_inputs
original_test_labels = test_labels

# compile list of classes. For the MNIST dataset, this will give us a 
# list of 10 class labels, from 0 to 9.
original_classes = list(np.union1d(np.unique(train_labels), np.unique(test_labels)))

# We will exclude all samples of "small classes" from the big training dataset.
#small_classes = [2,3]
small_classes = [1,3,5,7,9]
big_classes = remove_items(original_classes, small_classes)
num_big_classes = len(big_classes)
small_num_classes = len(small_classes)

(big_train_inputs, big_train_labels) = select_classes(original_train_inputs, 
                                                      original_train_labels, 
                                                      big_classes)

(big_test_inputs, big_test_labels) = select_classes(original_test_inputs, 
                                                    original_test_labels, 
                                                    big_classes)

(small_train_inputs, small_train_labels) = select_classes(original_train_inputs, 
                                                          original_train_labels, 
                                                          small_classes)

(small_test_inputs, small_test_labels) = select_classes(original_test_inputs, 
                                                        original_test_labels, 
                                                        small_classes)


#%%

# Train on big dataset, where all samples of "small classes" have been removed.

input_shape = train_inputs[0].shape

# create the model
model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(num_big_classes, activation="softmax"),
    ]
)

model.summary()

batch_size = 128
epochs = 15

model.compile(loss=keras.losses.SparseCategoricalCrossentropy(), optimizer="adam", metrics=["accuracy"])

# train the model on the "big classes" dataset
model.fit(big_train_inputs, big_train_labels, epochs=epochs, batch_size=batch_size)

# test the model on the "big classes" test set.
test_loss, test_acc = model.evaluate(big_test_inputs, big_test_labels, verbose=2)
print('\nTest accuracy: %.2f' % (test_acc * 100))
#model.save('mnist_5_15.h5')

#%% This part is just for sanity checking, to make sure that the accuracy we 
#   get using model.predict() is the same as the accuracy we get with 
#   model.evaluate()

predictions = model.predict(big_test_inputs)

classes = np.argmax(predictions, 1)
test_accuracy = np.mean(classes == big_test_labels)
print('\nTest accuracy: %.2f' % (test_accuracy * 100))

#%% Here we do transfer learning.
#   This is the implementation that uses the Sequential API
#   - load the model that was trained on the "big" dataset
#   - remove the previous output layer
#   - add a new output layer
#   - freeze all weights except the ones in the new output layer
#   - train on small dataset

# load the model that was trained on the "big" dataset

model = keras.models.load_model('mnist_5_15.h5')
num_layers = len(model.layers)
input_shape = small_train_inputs[0].shape

# This is where we use the Sequential API to create the model.
# Notice that we create a list of layers, where we use all layers of the 
# model except for the last one, and we add a new fully connected
# output layer.
refined_model = keras.Sequential([keras.Input(shape=input_shape)]+
                                 model.layers[0:num_layers-1]+ 
                                 [layers.Dense(small_num_classes, activation="softmax")])

# We freeze the weights of all layers except the ones in the (new) output layer.
num_layers = len(refined_model.layers)
for i in range(0, num_layers-1):
    refined_model.layers[i].trainable = False


refined_model.compile(loss=keras.losses.SparseCategoricalCrossentropy(), 
              optimizer="adam", metrics=["accuracy"])
refined_model.summary()

# train the model (essentially, train the weights of the last layer) on the
# "small" dataset. 
train_size = 100  # this specifies the size of the "small" training set
epochs = 100
batch_size = 4
hist_mnist_tl = refined_model.fit(small_train_inputs[0:train_size], 
                                  small_train_labels[0:train_size], 
                                  epochs=epochs, batch_size=batch_size,
#                                  validation_data = (small_test_inputs, small_test_labels)
                                  )


test_loss, test_acc = refined_model.evaluate(small_test_inputs, small_test_labels, verbose=2)
print('\nTest accuracy: %.2f' % (test_acc * 100))
#refined_model.save('mnist_refined5_tsize100_epochs100.h5')


#%% Compare to training on small dataset

input_shape = small_train_inputs[0].shape
train_size = 100  # this specifies the size of the "small" training set
small_model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(small_num_classes, activation="softmax"),
    ]
)

small_model.compile(loss=keras.losses.SparseCategoricalCrossentropy(), 
              optimizer="adam", metrics=["accuracy"])
small_model.summary()

small_model.fit(small_train_inputs[0:train_size], 
                small_train_labels[0:train_size], 
                epochs=100, batch_size=4)
test_loss, test_acc = small_model.evaluate(small_test_inputs, small_test_labels, verbose=2)
print('\nTest accuracy: %.2f%%' % (test_acc * 100))
#small_model.save('fashion_mnist_small5_tsize20_epochs100.h5')
# results: 84-87% in five tries.

#%% This is an alternative implemenation of transfer learning that
#   uses the Functional API instead of the Sequential API. We still 
#   follow these steps:
#   - load the model that was trained on the "big" dataset
#   - remove the previous output layer
#   - add a new output layer
#   - freeze all weights except the ones in the new output layer
#   - train on small dataset

# load the model that was trained on the "big" dataset
model = keras.models.load_model('mnist_5_15.h5')

num_layers = len(model.layers)
base_output = model.layers[num_layers-2].output

small_num_classes = len(small_classes)

# This is where we use the Functional API to create the model.
new_output = layers.Dense(small_num_classes, activation="softmax")(base_output)
refined_model = keras.models.Model(inputs = model.inputs, outputs=new_output)

# We freeze the weights of all layers except the ones in the (new) output layer.
for i in range(0, num_layers-1):
    refined_model.layers[i].trainable = False


refined_model.compile(loss=keras.losses.SparseCategoricalCrossentropy(), 
              optimizer="adam", metrics=["accuracy"])
refined_model.summary()

# train the model (essentially, train the weights of the last layer) on the
# "small" dataset. 
train_size = 100  # this specifies the size of the "small" training set
epochs = 100
batch_size = 4
refined_model.fit(small_train_inputs[0:train_size], 
                  small_train_labels[0:train_size], 
                  epochs=epochs, batch_size=batch_size)
test_loss, test_acc = refined_model.evaluate(small_test_inputs, small_test_labels, verbose=0)
print('\nTest accuracy: %.2f' % (test_acc * 100))
#refined_model.save('mnist_refined5_tsize20_epochs100.h5')
# results: 90.9-91.9% in ten trials

#%%