#%% """ Credits: This code is adapted from the textbook "Deep Learning with Python", 2nd Edition, by François Chollet. """ #%% This code should be run only once, to set aside 20% of the training set # for use as a validation set. # If this code is run multiple times, it will keep removing from the # training set and adding to the validation set. # # If your data comes from the aclImdb.zip file posted on the class website, # DO NOT use this code, as it has already been applied to this data. import os, pathlib, shutil, random base_dir = pathlib.Path("../../../../../home/cse4392_data/20_text/aclImdb") val_dir = base_dir / "val" train_dir = base_dir / "train" for category in ("neg", "pos"): os.makedirs(val_dir / category) files = os.listdir(train_dir / category) random.Random(1337).shuffle(files) num_val_samples = int(0.2 * len(files)) val_files = files[-num_val_samples:] for fname in val_files: shutil.move(train_dir / category / fname, val_dir / category / fname)