#%% """ Credits: This code is adapted from the textbook "Deep Learning with Python", 2nd Edition, by François Chollet. """ !wget https://s3.amazonaws.com/keras-datasets/jena_climate_2009_2016.csv.zip !unzip jena_climate_2009_2016.csv.zip #%% import numpy as np from tensorflow import keras from matplotlib import pyplot as plt from timeseries_code import * fname = "jena_climate_2009_2016.csv" with open(fname) as f: data = f.read() lines = data.split("\n") header = lines[0].split(",") lines = lines[1:] print(header) print(len(lines)) temperature = np.zeros((len(lines),)) raw_data = np.zeros((len(lines), len(header) - 1)) for i, line in enumerate(lines): values = [float(x) for x in line.split(",")[1:]] temperature[i] = values[1] # raw_data[i, :] = values[:] raw_data[i] = values #%% Shorter version of previous chunk of code fname = "jena_climate_2009_2016.csv" with open(fname) as f: data = f.read() lines = data.split("\n") lines = lines[1:] # The first line in the file is header information temperature = np.zeros((len(lines),)) raw_data = np.zeros((len(lines), len(header) - 1)) for i, line in enumerate(lines): values = [float(x) for x in line.split(",")[1:]] temperature[i] = values[1] raw_data[i] = values #%% plot the temperature over the entire 8 years of data plt.plot(range(0, len(temperature)), temperature) #%% plot the temperature over the first 10 days of observations plt.plot(range(0, 1440), temperature[:1440]) #%% num_train_samples = int(0.5 * len(raw_data)) num_val_samples = int(0.25 * len(raw_data)) num_test_samples = len(raw_data) - num_train_samples - num_val_samples print("num_train_samples:", num_train_samples) print("num_val_samples:", num_val_samples) print("num_test_samples:", num_test_samples) #%% Feature normalization so that each feature has mean 0 and std 1. (number, dimensions) = raw_data.shape normalized_data = np.zeros((number, dimensions)) for d in range (0, dimensions): feature_values = raw_data[0:num_train_samples,d] m = np.mean(feature_values) s = np.std(feature_values) normalized_data[:,d] = (raw_data[:,d] - m) / s #%% Alternative way to do the previous feature normalization, # using numpy shortcuts. mean = raw_data[:num_train_samples].mean(axis=0) normalized_data = raw_data - mean std = raw_data[:num_train_samples].std(axis=0) normalized_data = normalized_data / std plt.plot(range(0, number), raw_data[0:number, 1]) #%% num_train_samples = int(0.5 * len(normalized_data)) training_data = normalized_data[0:num_train_samples, :] training_temperature = temperature[0:num_train_samples] print(training_data.shape) (input1, target1) = random_input_v1(training_data, 720, 24*6, training_temperature) (input2, target2) = random_input(training_data, 720, 24*6, training_temperature, 6) #%% training_data = normalized_data[0:num_train_samples, :] time_step_length = 720 target_steps = 24*6 sampling = 6 dataset_size = 50000 print(training_data.shape) (training_inputs, training_targets) = example_set(training_data, dataset_size, time_step_length, target_steps, training_temperature, sampling) print("\ntraining time range: (%d, %d)" % (0, num_train_samples)) print("training_inputs.shape:", training_inputs.shape) print("training_targets.shape:", training_targets.shape) validation_start = num_train_samples validation_end = validation_start + num_val_samples validation_data = normalized_data[validation_start:validation_end, :] validation_temperature = temperature[validation_start:validation_end] (validation_inputs, validation_targets) = example_set(validation_data, dataset_size, time_step_length, target_steps, validation_temperature, sampling) print("\nvalidation time range: (%d, %d)" % (validation_start, validation_end)) print("validation_inputs.shape:", validation_inputs.shape) print("validation_targets.shape:", validation_targets.shape) test_start = validation_end test_end = test_start + num_test_samples test_data = normalized_data[test_start:test_end, :] test_temperature = temperature[test_start:test_end] (test_inputs, test_targets) = example_set(test_data, dataset_size, time_step_length, target_steps, test_temperature, sampling) print("\ntest time range: (%d, %d)" % (test_start, test_end)) print("test_inputs.shape:", test_inputs.shape) print("test_targets.shape:", test_targets.shape) #%% (input2, target2, s) = random_input(training_data, 720, 24*6, training_temperature, 6) plt.plot(range(0, input2.shape[0]), input2[:, 1]*std[1] + mean[1]) prediction = naive_forecast(input2, mean, std) print("prediction = %.2f\ntrue value = %.2f" % (prediction, target2)) print(s) #%% training_mae = evaluate_naive_forecast(training_inputs, training_targets, mean, std) print("training MAE: %.2f" % (training_mae)) validation_mae = evaluate_naive_forecast(validation_inputs, validation_targets, mean, std) print("validation MAE: %.2f" % (validation_mae)) test_mae = evaluate_naive_forecast(test_inputs, test_targets, mean, std) print("test MAE: %.2f" % (test_mae)) #%% input_shape = training_inputs[0].shape model = keras.Sequential([keras.Input(shape=input_shape), keras.layers.Flatten(), keras.layers.Dense(16, activation="relu"), keras.layers.Dense(1),]) model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"]) callbacks = [keras.callbacks.ModelCheckpoint("jena_dense1_16.keras", save_best_only=True)] history_dense = model.fit(training_inputs, training_targets, epochs=50, validation_data=(validation_inputs, validation_targets), callbacks=callbacks) val_mae = np.array(history_dense.history["val_mae"]) min_val_mae = val_mae.min() min_epoch = val_mae.argmin()+1 print("\nSmallest validation MAE: %.2f, reached in epoch %d" % (min_val_mae, min_epoch)) model = keras.models.load_model("jena_dense1_16.keras") (loss, test_mae) = model.evaluate(test_inputs, test_targets, verbose=0) print("Test MAE: %.2f" % (test_mae)) #%% loss = history_dense.history["mae"] val_loss = history_dense.history["val_mae"] epochs = range(1, len(loss) + 1) plt.figure() plt.plot(epochs, loss, "bo", label="Training MAE") plt.plot(epochs, val_loss, "b", label="Validation MAE") plt.title("Training and validation MAE") plt.legend() plt.show() #%% input_shape = training_inputs[0].shape model = keras.Sequential([keras.Input(shape=input_shape), keras.layers.LSTM(16), keras.layers.Dense(1),]) model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"]) callbacks = [keras.callbacks.ModelCheckpoint("jena_lstm1_16.keras", save_best_only=True)] history_lstm = model.fit(training_inputs, training_targets, epochs=20, validation_data=(validation_inputs, validation_targets), callbacks=callbacks) #%% loss = history_lstm.history["mae"] val_loss = history_lstm.history["val_mae"] epochs = range(1, len(loss) + 1) plt.figure() plt.plot(epochs, loss, "bo", label="Training MAE") plt.plot(epochs, val_loss, "b", label="Validation MAE") plt.title("Training and validation MAE") plt.legend() plt.show()