#%%

import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
from utilities_300 import *

dataset = keras.datasets.mnist
#dataset = keras.datasets.fashion_mnist

# the data, split between train and test sets
(train_inputs, train_labels), (test_inputs, test_labels) = dataset.load_data()
number_of_classes = np.max([np.max(train_labels), np.max(test_labels)]) + 1
input_shape = train_inputs[0].shape

# Scale images to the [0, 1] range
train_inputs = train_inputs.astype("float32") / 255
test_inputs = test_inputs.astype("float32") / 255

# Right now, train_inputs[i] (for any i) is a 28x28 2D array.
# The Conv2D layer requires each input to be a 3D array.
# We use the np.expand_dims function to convert each training input to 
# a 3D array of shape (28, 28, 1), to make it compatible with the Conv2D layer.
train_inputs = np.expand_dims(train_inputs, -1)
test_inputs = np.expand_dims(test_inputs, -1)

print("x_train shape:", train_inputs.shape)
print(train_inputs.shape[0], "train samples")
print(test_inputs.shape[0], "test samples")


#%% Create "big" dataset and "small" dataset

original_train_inputs = train_inputs
original_train_labels = train_labels
original_test_inputs = test_inputs
original_test_labels = test_labels

# compile list of classes. For the MNIST dataset, this will give us a 
# list of 10 class labels, from 0 to 9.
original_classes = list(np.union1d(np.unique(train_labels), np.unique(test_labels)))

# We will exclude all samples of "small classes" from the big training dataset.
#small_classes = [2,3]
small_classes = [1,3,5,7,9]
big_classes = remove_items(original_classes, small_classes)

(big_train_inputs, big_train_labels) = select_classes(original_train_inputs, 
                                                      original_train_labels, 
                                                      big_classes)

(big_test_inputs, big_test_labels) = select_classes(original_test_inputs, 
                                                    original_test_labels, 
                                                    big_classes)

(small_train_inputs, small_train_labels) = select_classes(original_train_inputs, 
                                                          original_train_labels, 
                                                          small_classes)

(small_test_inputs, small_test_labels) = select_classes(original_test_inputs, 
                                                        original_test_labels, 
                                                        small_classes)

#%%

temp = small_train_inputs
(num, _, _, _) = temp.shape
temp = np.repeat(temp, 3, axis=3)
small_train_inputs = np.zeros((num,32,32,3))
small_train_inputs[:,2:30,2:30,:] = temp

temp = small_test_inputs
(num, _, _, _) = temp.shape
temp = np.repeat(temp, 3, axis=3)
small_test_inputs = np.zeros((num,32,32,3))
small_test_inputs[:,2:30,2:30,:] = temp

#%%

input_shape = small_train_inputs[0].shape
vgg16 = keras.applications.vgg16.VGG16(
    weights="imagenet",
    include_top=False,
    input_shape=input_shape)

small_num_classes = len(small_classes)

# This is where we use the Sequential API to create the model.
# Notice that we create a list of layers, where we use all layers of the 
# model except for the last one, and we add a new fully connected
# output layer.
refined_model = keras.Sequential([keras.Input(shape=input_shape)]+
                                 vgg16.layers + 
                                 [layers.Flatten(),
                                  layers.Dropout(0.5),
                                  layers.Dense(512, activation="tanh"),
                                  layers.Dropout(0.5),
                                  layers.Dense(small_num_classes, activation="softmax")])

# We freeze the weights of all layers except the ones in the (new) output layer.
for i in range(0, len(vgg16.layers)):
    refined_model.layers[i].trainable = False


refined_model.compile(loss=keras.losses.SparseCategoricalCrossentropy(), 
              optimizer="adam", metrics=["accuracy"])
refined_model.summary()

# train the model (essentially, train the weights of the last layer) on the
# "small" dataset. 
train_size = 100  # this specifies the size of the "small" training set
epochs = 100
batch_size = 4
hist_1dense = refined_model.fit(small_train_inputs[0:train_size], 
                  small_train_labels[0:train_size], 
                  epochs=epochs, batch_size=batch_size,
#                  validation_data = (small_test_inputs, small_test_labels)
                  )



test_loss, test_acc = refined_model.evaluate(small_test_inputs, small_test_labels, verbose=2)
print('\nTest accuracy: %.2f' % (test_acc * 100))
#refined_model.save('fashion_mnist_refined5_tsize20_epochs100.h5')