"""Standard CNN with data augmentation.""" import random import numpy as np from itertools import islice from icenet import util from keras.models import Model from keras.initializers import glorot_uniform from keras.optimizers import Adam from keras import backend as K from keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler from keras.models import load_model from keras.layers import ( Input, Dense, Activation, Conv2D, BatchNormalization, Flatten, MaxPooling2D, Dropout, Concatenate, Reshape ) MODEL_NAME = 'icenet' def get_base(base_model_filepath, postfix, last_layer, freeze=True): base_model = load_model(base_model_filepath) for i,l in enumerate(base_model.layers[0:last_layer+1]): l.trainable = not (i == 0 or freeze) l.name = '%s_%s' % (l.name, postfix) #base_model.layers[last_layer].trainable = True # Should be avrg pool layer return base_model.input, base_model.layers[last_layer].output def get_model(base_model_filepath): print("Load base models ...") X_inputs, X_base_outputs = list(zip(*[ get_base(base_model_filepath, 'sec%d' % i, -5, freeze=True) for i in range(9) ])) print("Output layer:") print(X_base_outputs[0]) #X = Concatenate(axis=2)(list(X_base_outputs)) #X = Reshape((nw*3, nh*3, nc))(X) #_, n = K.int_shape(X_base_outputs[0]) _, nw, nh, nc = K.int_shape(X_base_outputs[0]) assert nw == 1 and nh == 1 and nc == 512 X = Concatenate(axis=-1)(list(X_base_outputs)) X = Reshape((3, 3, nc))(X) X = Conv2D(1024, kernel_size=(3, 3), strides=(1, 1), padding='valid', activation='relu', kernel_initializer=glorot_uniform(seed=0), name='conv_top')(X) #X = MaxPooling2D((3, 3))(X) X = Flatten()(X) X = Dropout(0.25)(X) X = Dense(1024, activation='relu', kernel_initializer=glorot_uniform(seed=0), name='fc1')(X) X = Dropout(0.25)(X) X = Dense(1, activation='sigmoid', kernel_initializer=glorot_uniform(seed=0), name='y_hat')(X) return Model(inputs=list(X_inputs), outputs=X, name=MODEL_NAME) def train(datadir): base_model_name = 'base_cnn' print("Load scaling from %s-scaling.csv" % base_model_name) minmax = util.load_minmax(datadir, '%s-scaling.pkl' % base_model_name) print("Load samples from train.json ...") samples = util.load_samples(datadir, 'train.json') m_tot = len(samples) print("Got %d samples" % m_tot) split = 0.90 train_samples, dev_samples = util.train_dev_split(samples, split) m_train = len(train_samples) m_dev = len(dev_samples) print("Split train/test = %.2f" % split) print("Training samples: %d" % m_train) print("Dev samples: %d" % m_dev) print("First 5 dev samples ID's:") print(' '.join([s['id'] for s in dev_samples[0:5]])) # Extract dev_samples dev_generator = util.icenet_generator( dev_samples, minmax, m_dev, crop_offset=3, augment=False ) X_dev, Y_dev = list(islice(dev_generator, 1))[0] # Model + opt def lr_schedule(epoch): if epoch < 20: return 0.0005 elif epoch < 50: return 0.0002 elif epoch < 200: return 0.00005 else: return 0.00001 model = get_model(util.model_fp(datadir, '%s-weights-val_loss.h5' % base_model_name)) batch_size = 16 opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) model.compile( optimizer=opt, loss="binary_crossentropy", metrics = ["accuracy"] ) _summary = [] model.summary(print_fn=lambda x: _summary.append(x)) print('\n'.join(_summary[0:8])) print("...") print("...") print("...") print("...") print('\n'.join(_summary[-40:])) # Callbacks callbacks = [ ModelCheckpoint( filepath=util.model_fp(datadir, '%s-weights-val_loss.h5' % MODEL_NAME), verbose=1, monitor='val_loss', save_best_only=True ), ModelCheckpoint( filepath=util.model_fp(datadir, '%s-weights-loss.h5' % MODEL_NAME), verbose=1, monitor='loss', save_best_only=True ), LearningRateScheduler(lr_schedule), EarlyStopping( 'loss', patience=50, mode="min" ) ] # TRAIN! model.fit_generator( util.icenet_generator(train_samples, minmax, batch_size, crop_offset=3), steps_per_epoch=int(2*m_train/batch_size), epochs=1000, validation_data=(X_dev, Y_dev), callbacks=callbacks ) score = model.evaluate(X_dev, Y_dev) print("") print("Test loss: %.4f" % score[0]) print("Test accuracy: %.4f" % score[1]) print("Make dev predictions ...") y_pred = model.predict(X_dev, batch_size=32) print("Write to %s-dev.csv" % MODEL_NAME) util.write_preds(datadir, '%s-dev.csv' % MODEL_NAME, dev_samples, y_pred) n_test = 30 print("Check invariance on %d random augmented dev samples" % n_test) for i in range(n_test): test_sample = random.choice(dev_samples) dev_generator = util.icenet_generator( [test_sample], minmax, 6*6*4*2, crop_offset=3, augment=True ) X_dev, Y_dev = list(islice(dev_generator, 1))[0] y_pred = model.predict(X_dev, batch_size=32) print("Dev sample %s (is_iceberg=%s): Mean = %.4f, std = %.4f, min = %.4f, max = %.4f" % ( test_sample.get('id'), test_sample.get('is_iceberg'), np.mean(y_pred), np.std(y_pred), np.min(y_pred), np.max(y_pred) )) def predict(datadir): print("Load model from %s-weights-loss.h5 ..." % MODEL_NAME) model = load_model(util.model_fp(datadir, '%s-weights-loss.h5' % MODEL_NAME)) # Load scaling factors base_model_name = 'base_cnn' print("Load scaling from %s-scaling.pkl" % base_model_name) minmax = util.load_minmax(datadir, '%s-scaling.pkl' % base_model_name) target_fp = 'train.json' print("Load samples from %s..." % target_fp) samples = util.load_samples(datadir, target_fp) m_tot = len(samples) print("Got %d samples" % m_tot) # Extract samples with generator data_gen = util.icenet_generator( samples, minmax, m_tot, crop_offset=3, augment=False ) X, _= list(islice(data_gen, 1))[0] print("X (image) shape:") print(X[0].shape) # Predict ... print("Predict!") y_pred = model.predict(X, batch_size=32) filename = '%s-%s.csv' % (MODEL_NAME, target_fp.split('.')[0]) print("Write to %s" % filename) util.write_preds(datadir, filename, samples, y_pred) n_test = 20 print("Check invariance on %d random augmented dev samples" % n_test) for i in range(n_test): test_sample = random.choice(samples) dev_generator = util.icenet_generator( [test_sample], minmax, 6*6*4*2, crop_offset=3, augment=True ) X_dev, Y_dev = list(islice(dev_generator, 1))[0] y_pred = model.predict(X_dev, batch_size=32) print("Dev sample %s (is_iceberg=%s): Mean = %.4f, std = %.4f, min = %.4f, max = %.4f" % ( test_sample.get('id'), test_sample.get('is_iceberg'), np.mean(y_pred), np.std(y_pred), np.min(y_pred), np.max(y_pred) )) def _sigmoid(z): return 1.0 / (1 + np.exp(-z)) def stretch(z, factor=10): return _sigmoid(factor*(z-0.5)) def predict_avrg(datadir): print("Load model from %s-weights-loss.h5 ..." % MODEL_NAME) model = load_model(util.model_fp(datadir, '%s-weights-loss.h5' % MODEL_NAME)) # Load scaling factors base_model_name = 'base_cnn' print("Load scaling from %s-scaling.pkl" % base_model_name) minmax = util.load_minmax(datadir, '%s-scaling.pkl' % base_model_name) target_fp = 'train.json' print("Load samples from %s..." % target_fp) samples = util.load_samples(datadir, target_fp) random.shuffle(samples) samples = samples[0:400] random.shuffle(samples) m_tot = len(samples) print("Got %d samples" % m_tot) n_test = 6*6*4*2 y_pred_first = np.zeros((len(samples), 1)) y_pred_avrg = np.zeros((len(samples), 1)) y_pred_avrg_sig10 = np.zeros((len(samples), 1)) y_pred_avrg_sig20 = np.zeros((len(samples), 1)) y_pred_avrg_sig30 = np.zeros((len(samples), 1)) y_pred_avrg_sig40 = np.zeros((len(samples), 1)) y_pred_avrg_sig50 = np.zeros((len(samples), 1)) y_reals = np.zeros((len(samples), 1)) print("Average each sample over %d augmented versions" % n_test) for i,s in enumerate(samples): dev_generator = util.icenet_generator( [s], minmax, n_test, crop_offset=3, augment=True ) X_dev, Y_dev = list(islice(dev_generator, 1))[0] y_pred = model.predict(X_dev, batch_size=n_test) y_pred_first[i,0] = y_pred[0,0] y_pred_avrg[i,0] = np.mean(y_pred) y_pred_avrg_sig10[i,0] = stretch(y_pred_avrg[i,0], factor=10) y_pred_avrg_sig20[i,0] = stretch(y_pred_avrg[i,0], factor=11) y_pred_avrg_sig30[i,0] = stretch(y_pred_avrg[i,0], factor=12) y_pred_avrg_sig40[i,0] = stretch(y_pred_avrg[i,0], factor=13) y_pred_avrg_sig50[i,0] = stretch(y_pred_avrg[i,0], factor=14) y_reals[i,0] = s.get('is_iceberg', 0) * 1.0 print("Sample %d: %s (iceberg=%s): Mean = %.4f, s(10) = %.4f, s(40) = %.4f std = %.4f, min = %.4f, max = %.4f" % ( i, s.get('id'), s.get('is_iceberg'), np.mean(y_pred), y_pred_avrg_sig10[i,0], y_pred_avrg_sig40[i,0], np.std(y_pred), np.min(y_pred), np.max(y_pred) )) print("'First' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_first)) print("'Avrg' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg)) print("'Avrg stretch(10)' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg_sig10)) print("'Avrg stretch(20)' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg_sig20)) print("'Avrg stretch(30)' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg_sig30)) print("'Avrg stretch(40)' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg_sig40)) print("'Avrg stretch(50)' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg_sig50)) filename = '%s-%s-avrg.csv' % (MODEL_NAME, target_fp.split('.')[0]) print("Write to %s" % filename) util.write_preds(datadir, filename, samples, y_pred_avrg) filename = '%s-%s-fst.csv' % (MODEL_NAME, target_fp.split('.')[0]) print("Write to %s" % filename) util.write_preds(datadir, filename, samples, y_pred_first) filename = '%s-%s-s10.csv' % (MODEL_NAME, target_fp.split('.')[0]) print("Write to %s" % filename) util.write_preds(datadir, filename, samples, y_pred_avrg_sig10) filename = '%s-%s-s20.csv' % (MODEL_NAME, target_fp.split('.')[0]) print("Write to %s" % filename) util.write_preds(datadir, filename, samples, y_pred_avrg_sig20) filename = '%s-%s-s30.csv' % (MODEL_NAME, target_fp.split('.')[0]) print("Write to %s" % filename) util.write_preds(datadir, filename, samples, y_pred_avrg_sig30) filename = '%s-%s-s40.csv' % (MODEL_NAME, target_fp.split('.')[0]) print("Write to %s" % filename) util.write_preds(datadir, filename, samples, y_pred_avrg_sig40) filename = '%s-%s-s50.csv' % (MODEL_NAME, target_fp.split('.')[0]) print("Write to %s" % filename) util.write_preds(datadir, filename, samples, y_pred_avrg_sig50)