import numpy as np from tensorflow import keras from tensorflow.keras import layers import matplotlib.pyplot as plt from utilities_300 import * # load and preprocess dataset dataset = keras.datasets.mnist #dataset = keras.datasets.fashion_mnist # the data, split between train and test sets (train_inputs, train_labels), (test_inputs, test_labels) = dataset.load_data() number_of_classes = np.max([np.max(train_labels), np.max(test_labels)]) + 1 input_shape = train_inputs[0].shape # Scale images to the [0, 1] range train_inputs = train_inputs.astype("float32") / 255 test_inputs = test_inputs.astype("float32") / 255 # Right now, train_inputs[i] (for any i) is a 28x28 2D array. # The Conv2D layer requires each input to be a 3D array. # We use the np.expand_dims function to convert each training input to # a 3D array of shape (28, 28, 1), to make it compatible with the Conv2D layer. train_inputs = np.expand_dims(train_inputs, -1) test_inputs = np.expand_dims(test_inputs, -1) print("x_train shape:", train_inputs.shape) print(train_inputs.shape[0], "train samples") print(test_inputs.shape[0], "test samples") #%% Create "big" dataset and "small" dataset original_train_inputs = train_inputs original_train_labels = train_labels original_test_inputs = test_inputs original_test_labels = test_labels # compile list of classes. For the MNIST dataset, this will give us a # list of 10 class labels, from 0 to 9. original_classes = list(np.union1d(np.unique(train_labels), np.unique(test_labels))) # We will exclude all samples of "small classes" from the big training dataset. #small_classes = [2,3] small_classes = [1,3,5,7,9] big_classes = remove_items(original_classes, small_classes) num_big_classes = len(big_classes) small_num_classes = len(small_classes) (big_train_inputs, big_train_labels) = select_classes(original_train_inputs, original_train_labels, big_classes) (big_test_inputs, big_test_labels) = select_classes(original_test_inputs, original_test_labels, big_classes) (small_train_inputs, small_train_labels) = select_classes(original_train_inputs, original_train_labels, small_classes) (small_test_inputs, small_test_labels) = select_classes(original_test_inputs, original_test_labels, small_classes) #%% # Train on big dataset, where all samples of "small classes" have been removed. input_shape = train_inputs[0].shape # create the model model = keras.Sequential( [ keras.Input(shape=input_shape), layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), layers.MaxPooling2D(pool_size=(2, 2)), layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), layers.MaxPooling2D(pool_size=(2, 2)), layers.Flatten(), layers.Dropout(0.5), layers.Dense(num_big_classes, activation="softmax"), ] ) model.summary() batch_size = 128 epochs = 15 model.compile(loss=keras.losses.SparseCategoricalCrossentropy(), optimizer="adam", metrics=["accuracy"]) # train the model on the "big classes" dataset model.fit(big_train_inputs, big_train_labels, epochs=epochs, batch_size=batch_size) # test the model on the "big classes" test set. test_loss, test_acc = model.evaluate(big_test_inputs, big_test_labels, verbose=2) print('\nTest accuracy: %.2f' % (test_acc * 100)) #model.save('mnist_5_15.h5') #%% This part is just for sanity checking, to make sure that the accuracy we # get using model.predict() is the same as the accuracy we get with # model.evaluate() predictions = model.predict(big_test_inputs) classes = np.argmax(predictions, 1) test_accuracy = np.mean(classes == big_test_labels) print('\nTest accuracy: %.2f' % (test_accuracy * 100)) #%% Here we do transfer learning. # This is the implementation that uses the Sequential API # - load the model that was trained on the "big" dataset # - remove the previous output layer # - add a new output layer # - freeze all weights except the ones in the new output layer # - train on small dataset # load the model that was trained on the "big" dataset model = keras.models.load_model('mnist_5_15.h5') num_layers = len(model.layers) input_shape = small_train_inputs[0].shape # This is where we use the Sequential API to create the model. # Notice that we create a list of layers, where we use all layers of the # model except for the last one, and we add a new fully connected # output layer. refined_model = keras.Sequential([keras.Input(shape=input_shape)]+ model.layers[0:num_layers-1]+ [layers.Dense(small_num_classes, activation="softmax")]) # We freeze the weights of all layers except the ones in the (new) output layer. num_layers = len(refined_model.layers) for i in range(0, num_layers-1): refined_model.layers[i].trainable = False refined_model.compile(loss=keras.losses.SparseCategoricalCrossentropy(), optimizer="adam", metrics=["accuracy"]) refined_model.summary() # train the model (essentially, train the weights of the last layer) on the # "small" dataset. train_size = 100 # this specifies the size of the "small" training set epochs = 100 batch_size = 4 hist_mnist_tl = refined_model.fit(small_train_inputs[0:train_size], small_train_labels[0:train_size], epochs=epochs, batch_size=batch_size, # validation_data = (small_test_inputs, small_test_labels) ) test_loss, test_acc = refined_model.evaluate(small_test_inputs, small_test_labels, verbose=2) print('\nTest accuracy: %.2f' % (test_acc * 100)) #refined_model.save('mnist_refined5_tsize100_epochs100.h5') #%% Compare to training on small dataset input_shape = small_train_inputs[0].shape train_size = 100 # this specifies the size of the "small" training set small_model = keras.Sequential( [ keras.Input(shape=input_shape), layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), layers.MaxPooling2D(pool_size=(2, 2)), layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), layers.MaxPooling2D(pool_size=(2, 2)), layers.Flatten(), layers.Dropout(0.5), layers.Dense(small_num_classes, activation="softmax"), ] ) small_model.compile(loss=keras.losses.SparseCategoricalCrossentropy(), optimizer="adam", metrics=["accuracy"]) small_model.summary() small_model.fit(small_train_inputs[0:train_size], small_train_labels[0:train_size], epochs=100, batch_size=4) test_loss, test_acc = small_model.evaluate(small_test_inputs, small_test_labels, verbose=2) print('\nTest accuracy: %.2f%%' % (test_acc * 100)) #small_model.save('fashion_mnist_small5_tsize20_epochs100.h5') # results: 84-87% in five tries. #%% This is an alternative implemenation of transfer learning that # uses the Functional API instead of the Sequential API. We still # follow these steps: # - load the model that was trained on the "big" dataset # - remove the previous output layer # - add a new output layer # - freeze all weights except the ones in the new output layer # - train on small dataset # load the model that was trained on the "big" dataset model = keras.models.load_model('mnist_5_15.h5') num_layers = len(model.layers) base_output = model.layers[num_layers-2].output small_num_classes = len(small_classes) # This is where we use the Functional API to create the model. new_output = layers.Dense(small_num_classes, activation="softmax")(base_output) refined_model = keras.models.Model(inputs = model.inputs, outputs=new_output) # We freeze the weights of all layers except the ones in the (new) output layer. for i in range(0, num_layers-1): refined_model.layers[i].trainable = False refined_model.compile(loss=keras.losses.SparseCategoricalCrossentropy(), optimizer="adam", metrics=["accuracy"]) refined_model.summary() # train the model (essentially, train the weights of the last layer) on the # "small" dataset. train_size = 100 # this specifies the size of the "small" training set epochs = 100 batch_size = 4 refined_model.fit(small_train_inputs[0:train_size], small_train_labels[0:train_size], epochs=epochs, batch_size=batch_size) test_loss, test_acc = refined_model.evaluate(small_test_inputs, small_test_labels, verbose=0) print('\nTest accuracy: %.2f' % (test_acc * 100)) #refined_model.save('mnist_refined5_tsize20_epochs100.h5') # results: 90.9-91.9% in ten trials #%%