"""Base CNN.""" import random import numpy as np from itertools import islice from icenet import util from icenet import residual from icenet import resnet from keras.models import Model from keras.initializers import glorot_uniform from keras.optimizers import Adam, SGD from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau from keras.models import load_model from keras.layers import ( Input, Add, Dense, Activation, Conv2D, BatchNormalization, Flatten, MaxPooling2D, AveragePooling2D, Dropout, Concatenate, ZeroPadding2D ) MODEL_NAME = 'base_cnn' def get_model_simple(img_shape): X_img = Input(img_shape) X = ZeroPadding2D((2, 2))(X_img) # Conv 1 X = Conv2D(64, kernel_size=(5, 5), strides=(1, 1), padding='valid', activation='relu', kernel_initializer=glorot_uniform(seed=0), name='conv1')(X) X = Conv2D(128, kernel_size=(3, 3), strides=(1, 1), padding='valid', activation='relu', kernel_initializer=glorot_uniform(seed=0), name='conv2')(X) X = MaxPooling2D((3, 3), strides=(2, 2))(X) X = Dropout(0.2)(X) X = Conv2D(128, kernel_size=(3, 3), strides=(1, 1), padding='valid', activation='relu', kernel_initializer=glorot_uniform(seed=0), name='conv3')(X) X = MaxPooling2D((2, 2), strides=(2, 2))(X) X = Dropout(0.2)(X) X = Conv2D(64, kernel_size=(3, 3), strides=(1, 1), padding='valid', activation='relu', kernel_initializer=glorot_uniform(seed=0), name='conv4')(X) X = MaxPooling2D((3, 3), strides=(1, 1))(X) X = Dropout(0.2)(X) X = Flatten()(X) X = Dense(512, activation='relu', kernel_initializer=glorot_uniform(seed=0), name='fc1')(X) X = Dropout(0.2)(X) X = Dense(256, activation='relu', kernel_initializer=glorot_uniform(seed=0), name='fc2')(X) X = Dropout(0.2)(X) X = Dense(3, activation='sigmoid', kernel_initializer=glorot_uniform(seed=0), name='y_hat')(X) return Model(inputs=X_img, outputs=X, name=MODEL_NAME) def get_model_residual(img_shape): X_img = Input(img_shape) X = ZeroPadding2D((2, 2))(X_img) # Conv 1 X = Conv2D(32, kernel_size=(5, 5), strides=(1, 1), padding='valid', kernel_initializer=glorot_uniform(seed=0), name='conv1')(X) X = BatchNormalization(axis=3, name='bn_conv1')(X) X = Activation('relu')(X) X = MaxPooling2D((2, 2), strides=(2, 2))(X) # Conv 2 (residual) X = residual.convolutional_block(X, 4, (32, 32, 128), 'stage2a', s=1) X = residual.identity_block(X, 4, (32, 32, 128), 'stage2b') #X = residual.identity_block(X, 4, (32, 32, 128), 'stage2c') # Conv 3 (residual) X = residual.convolutional_block(X, 3, (64, 64, 256), 'stage3a', s=2) X = residual.identity_block(X, 3, (64, 64, 256), 'stage3b') X = residual.identity_block(X, 3, (64, 64, 256), 'stage3c') X = residual.identity_block(X, 3, (64, 64, 256), 'stage3d') # Conv 4 (residual) X = residual.convolutional_block(X, 3, (128, 128, 512), 'stage4a', s=2) X = residual.identity_block(X, 3, (128, 128, 512), 'stage4b') X = residual.identity_block(X, 3, (128, 128, 512), 'stage4c') #X = residual.identity_block(X, 3, (128, 128, 512), 'stage4d') #X = residual.identity_block(X, 3, (128, 128, 512), 'stage4e') #X = AveragePooling2D(pool_size=(4, 4), name='avg_pool')(X) X = Conv2D(512, kernel_size=(4, 4), strides=(1, 1), padding='valid', kernel_initializer=glorot_uniform(seed=0), name='convend')(X) X = BatchNormalization(axis=3, name='bn_convend')(X) X = Activation('relu')(X) # Flatten X = Flatten()(X) X = Dense(3, activation='softmax', kernel_initializer=glorot_uniform(seed=0), name='y_hat')(X) return Model(inputs=X_img, outputs=X, name=MODEL_NAME) def get_model_res18(img_shape): X_img = Input(img_shape) X = ZeroPadding2D((2, 2))(X_img) # Conv 1 X = Conv2D(64, kernel_size=(5, 5), strides=(1, 1), padding='valid', kernel_initializer=glorot_uniform(seed=0), name='conv1')(X) X = BatchNormalization(axis=3, name='bn_conv1')(X) X = Activation('relu')(X) # Conv 2 (residual) X = residual.basic_block(X, 3, (64, 64), 'stage2a') X = residual.basic_block(X, 3, (64, 64), 'stage2b') # Conv 3 (residual) X = residual.basic_block(X, 3, (128, 128), 'stage3a') X = residual.basic_block(X, 3, (128, 128), 'stage3b') # Conv 4 (residual) X = residual.basic_block(X, 3, (256, 256), 'stage4a') X = residual.basic_block(X, 3, (256, 256), 'stage4b') # Conv 5 (residual) X = residual.basic_block(X, 3, (512, 512), 'stage5a') X = residual.basic_block(X, 3, (512, 512), 'stage5b') # AveragePool X = AveragePooling2D(pool_size=(2, 2), name='avg_pool')(X) # Flatten X = Flatten()(X) X = Dense(3, activation='softmax', kernel_initializer=glorot_uniform(seed=0), name='y_hat')(X) return Model(inputs=X_img, outputs=X, name=MODEL_NAME) def train(datadir): print("Load samples from train.json ...") samples = util.load_samples(datadir, 'train.json') m_tot = len(samples) print("Got %d samples" % m_tot) # Split the samples with the same seed every time so that the dev set # is "frozen". This makes it easier to monitor and compare different models. # When a good model is found, the dev set can then be removed entirely # in order to fully utilize the available training data. split = 0.90 train_samples, dev_samples = util.train_dev_split(samples, split, shuffle=True) m_train = len(train_samples) m_dev = len(dev_samples) print("Split train/test = %.2f" % split) print("Training samples: %d" % m_train) print("Dev samples: %d" % m_dev) print("First 5 dev samples ID's:") print(' '.join([s['id'] for s in dev_samples[0:5]])) # The minimum/maximum values of the entire training set will determine # the scaling factors. We store the scaling factors to file so that they # can be retrieved and re-used for predictions. minmax = util.get_minmax(train_samples) print("Write scaling to %s-scaling.csv" % MODEL_NAME) util.save_minmax(datadir, '%s-scaling.pkl' % MODEL_NAME, minmax) # Since we make heavy use of augmentation here, we can also augment # the dev set just to smooth out validation losses during training. # Augment the dev set x10. m_dev *= 10 print("Dev samples (augmented): %d" % m_dev) dev_generator = util.base_cnn_generator( dev_samples, minmax, m_dev ) X_dev, Y_dev = list(islice(dev_generator, 1))[0] # Model + optimization parameters. #model = get_model_res18((28, 28, 2)) model = resnet.ResnetBuilder.build_resnet_18((2, 28, 28), 3) batch_size = 32 #opt = Adam(lr=0.0002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) opt = SGD(lr=0.005, momentum=0.9, decay=1e-5) model.compile( optimizer=opt, loss="categorical_crossentropy", metrics = ["accuracy"] ) model.summary() # Callbacks callbacks = [ ModelCheckpoint( filepath=util.model_fp(datadir, '%s-weights-val_loss.h5' % MODEL_NAME), verbose=1, monitor='val_loss', save_best_only=True ), ModelCheckpoint( filepath=util.model_fp(datadir, '%s-weights-loss.h5' % MODEL_NAME), verbose=1, monitor='loss', save_best_only=True ), ReduceLROnPlateau( monitor='val_loss', factor=0.1, verbose=1, patience=8, epsilon=0.005, mode='min', min_lr=1e-7 ), EarlyStopping( 'loss', patience=30, mode="min" ) ] # TRAIN! model.fit_generator( util.base_cnn_generator(train_samples, minmax, batch_size), steps_per_epoch=int(4*m_train/batch_size), epochs=1000, validation_data=(X_dev, Y_dev), callbacks=callbacks ) score = model.evaluate(X_dev, Y_dev) print("") print("Test loss: %.4f" % score[0]) print("Test accuracy: %.4f" % score[1])