333 lines
12 KiB
Python
333 lines
12 KiB
Python
"""Standard CNN with data augmentation."""
|
|
|
|
import random
|
|
import numpy as np
|
|
from itertools import islice
|
|
from icenet import util
|
|
|
|
from keras.models import Model
|
|
from keras.initializers import glorot_uniform
|
|
from keras.optimizers import Adam
|
|
from keras import backend as K
|
|
from keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler
|
|
from keras.models import load_model
|
|
from keras.layers import (
|
|
Input, Dense, Activation, Conv2D, BatchNormalization, Flatten,
|
|
MaxPooling2D, Dropout, Concatenate, Reshape
|
|
)
|
|
|
|
|
|
MODEL_NAME = 'icenet'
|
|
|
|
|
|
def get_base(base_model_filepath, postfix, last_layer, freeze=True):
|
|
base_model = load_model(base_model_filepath)
|
|
for i,l in enumerate(base_model.layers[0:last_layer+1]):
|
|
l.trainable = not (i == 0 or freeze)
|
|
l.name = '%s_%s' % (l.name, postfix)
|
|
#base_model.layers[last_layer].trainable = True # Should be avrg pool layer
|
|
return base_model.input, base_model.layers[last_layer].output
|
|
|
|
|
|
def get_model(base_model_filepath):
|
|
|
|
print("Load base models ...")
|
|
X_inputs, X_base_outputs = list(zip(*[
|
|
get_base(base_model_filepath, 'sec%d' % i, -5, freeze=True) for i in range(9)
|
|
]))
|
|
print("Output layer:")
|
|
print(X_base_outputs[0])
|
|
|
|
#X = Concatenate(axis=2)(list(X_base_outputs))
|
|
#X = Reshape((nw*3, nh*3, nc))(X)
|
|
#_, n = K.int_shape(X_base_outputs[0])
|
|
_, nw, nh, nc = K.int_shape(X_base_outputs[0])
|
|
assert nw == 1 and nh == 1 and nc == 512
|
|
X = Concatenate(axis=-1)(list(X_base_outputs))
|
|
X = Reshape((3, 3, nc))(X)
|
|
|
|
X = Conv2D(1024,
|
|
kernel_size=(3, 3),
|
|
strides=(1, 1),
|
|
padding='valid',
|
|
activation='relu',
|
|
kernel_initializer=glorot_uniform(seed=0),
|
|
name='conv_top')(X)
|
|
#X = MaxPooling2D((3, 3))(X)
|
|
|
|
X = Flatten()(X)
|
|
X = Dropout(0.25)(X)
|
|
X = Dense(1024,
|
|
activation='relu',
|
|
kernel_initializer=glorot_uniform(seed=0),
|
|
name='fc1')(X)
|
|
X = Dropout(0.25)(X)
|
|
|
|
X = Dense(1,
|
|
activation='sigmoid',
|
|
kernel_initializer=glorot_uniform(seed=0),
|
|
name='y_hat')(X)
|
|
|
|
return Model(inputs=list(X_inputs), outputs=X, name=MODEL_NAME)
|
|
|
|
|
|
def train(datadir):
|
|
base_model_name = 'base_cnn'
|
|
print("Load scaling from %s-scaling.csv" % base_model_name)
|
|
minmax = util.load_minmax(datadir, '%s-scaling.pkl' % base_model_name)
|
|
|
|
print("Load samples from train.json ...")
|
|
samples = util.load_samples(datadir, 'train.json')
|
|
m_tot = len(samples)
|
|
print("Got %d samples" % m_tot)
|
|
|
|
split = 0.90
|
|
train_samples, dev_samples = util.train_dev_split(samples, split)
|
|
m_train = len(train_samples)
|
|
m_dev = len(dev_samples)
|
|
print("Split train/test = %.2f" % split)
|
|
print("Training samples: %d" % m_train)
|
|
print("Dev samples: %d" % m_dev)
|
|
print("First 5 dev samples ID's:")
|
|
print(' '.join([s['id'] for s in dev_samples[0:5]]))
|
|
|
|
# Extract dev_samples
|
|
dev_generator = util.icenet_generator(
|
|
dev_samples, minmax, m_dev, crop_offset=3,
|
|
augment=False
|
|
)
|
|
X_dev, Y_dev = list(islice(dev_generator, 1))[0]
|
|
|
|
# Model + opt
|
|
def lr_schedule(epoch):
|
|
if epoch < 20:
|
|
return 0.0005
|
|
elif epoch < 50:
|
|
return 0.0002
|
|
elif epoch < 200:
|
|
return 0.00005
|
|
else:
|
|
return 0.00001
|
|
model = get_model(util.model_fp(datadir, '%s-weights-val_loss.h5' % base_model_name))
|
|
batch_size = 16
|
|
opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
|
|
model.compile(
|
|
optimizer=opt,
|
|
loss="binary_crossentropy",
|
|
metrics = ["accuracy"]
|
|
)
|
|
_summary = []
|
|
model.summary(print_fn=lambda x: _summary.append(x))
|
|
print('\n'.join(_summary[0:8]))
|
|
print("...")
|
|
print("...")
|
|
print("...")
|
|
print("...")
|
|
print('\n'.join(_summary[-40:]))
|
|
|
|
# Callbacks
|
|
callbacks = [
|
|
ModelCheckpoint(
|
|
filepath=util.model_fp(datadir, '%s-weights-val_loss.h5' % MODEL_NAME),
|
|
verbose=1,
|
|
monitor='val_loss',
|
|
save_best_only=True
|
|
),
|
|
ModelCheckpoint(
|
|
filepath=util.model_fp(datadir, '%s-weights-loss.h5' % MODEL_NAME),
|
|
verbose=1,
|
|
monitor='loss',
|
|
save_best_only=True
|
|
),
|
|
LearningRateScheduler(lr_schedule),
|
|
EarlyStopping(
|
|
'loss',
|
|
patience=50,
|
|
mode="min"
|
|
)
|
|
]
|
|
|
|
# TRAIN!
|
|
model.fit_generator(
|
|
util.icenet_generator(train_samples, minmax, batch_size, crop_offset=3),
|
|
steps_per_epoch=int(2*m_train/batch_size),
|
|
epochs=1000,
|
|
validation_data=(X_dev, Y_dev),
|
|
callbacks=callbacks
|
|
)
|
|
|
|
score = model.evaluate(X_dev, Y_dev)
|
|
print("")
|
|
print("Test loss: %.4f" % score[0])
|
|
print("Test accuracy: %.4f" % score[1])
|
|
|
|
print("Make dev predictions ...")
|
|
y_pred = model.predict(X_dev, batch_size=32)
|
|
print("Write to %s-dev.csv" % MODEL_NAME)
|
|
util.write_preds(datadir, '%s-dev.csv' % MODEL_NAME, dev_samples, y_pred)
|
|
|
|
n_test = 30
|
|
print("Check invariance on %d random augmented dev samples" % n_test)
|
|
for i in range(n_test):
|
|
test_sample = random.choice(dev_samples)
|
|
dev_generator = util.icenet_generator(
|
|
[test_sample], minmax, 6*6*4*2, crop_offset=3,
|
|
augment=True
|
|
)
|
|
X_dev, Y_dev = list(islice(dev_generator, 1))[0]
|
|
y_pred = model.predict(X_dev, batch_size=32)
|
|
print("Dev sample %s (is_iceberg=%s): Mean = %.4f, std = %.4f, min = %.4f, max = %.4f" % (
|
|
test_sample.get('id'), test_sample.get('is_iceberg'),
|
|
np.mean(y_pred), np.std(y_pred),
|
|
np.min(y_pred), np.max(y_pred)
|
|
))
|
|
|
|
|
|
|
|
def predict(datadir):
|
|
print("Load model from %s-weights-loss.h5 ..." % MODEL_NAME)
|
|
model = load_model(util.model_fp(datadir, '%s-weights-loss.h5' % MODEL_NAME))
|
|
|
|
# Load scaling factors
|
|
base_model_name = 'base_cnn'
|
|
print("Load scaling from %s-scaling.pkl" % base_model_name)
|
|
minmax = util.load_minmax(datadir, '%s-scaling.pkl' % base_model_name)
|
|
|
|
target_fp = 'train.json'
|
|
print("Load samples from %s..." % target_fp)
|
|
samples = util.load_samples(datadir, target_fp)
|
|
m_tot = len(samples)
|
|
print("Got %d samples" % m_tot)
|
|
|
|
# Extract samples with generator
|
|
data_gen = util.icenet_generator(
|
|
samples, minmax, m_tot, crop_offset=3,
|
|
augment=False
|
|
)
|
|
X, _= list(islice(data_gen, 1))[0]
|
|
print("X (image) shape:")
|
|
print(X[0].shape)
|
|
|
|
# Predict ...
|
|
print("Predict!")
|
|
y_pred = model.predict(X, batch_size=32)
|
|
|
|
filename = '%s-%s.csv' % (MODEL_NAME, target_fp.split('.')[0])
|
|
print("Write to %s" % filename)
|
|
util.write_preds(datadir, filename, samples, y_pred)
|
|
|
|
n_test = 20
|
|
print("Check invariance on %d random augmented dev samples" % n_test)
|
|
for i in range(n_test):
|
|
test_sample = random.choice(samples)
|
|
dev_generator = util.icenet_generator(
|
|
[test_sample], minmax, 6*6*4*2, crop_offset=3,
|
|
augment=True
|
|
)
|
|
X_dev, Y_dev = list(islice(dev_generator, 1))[0]
|
|
y_pred = model.predict(X_dev, batch_size=32)
|
|
print("Dev sample %s (is_iceberg=%s): Mean = %.4f, std = %.4f, min = %.4f, max = %.4f" % (
|
|
test_sample.get('id'), test_sample.get('is_iceberg'),
|
|
np.mean(y_pred), np.std(y_pred),
|
|
np.min(y_pred), np.max(y_pred)
|
|
))
|
|
|
|
|
|
def _sigmoid(z):
|
|
return 1.0 / (1 + np.exp(-z))
|
|
|
|
|
|
def stretch(z, factor=10):
|
|
return _sigmoid(factor*(z-0.5))
|
|
|
|
|
|
def predict_avrg(datadir):
|
|
print("Load model from %s-weights-loss.h5 ..." % MODEL_NAME)
|
|
model = load_model(util.model_fp(datadir, '%s-weights-loss.h5' % MODEL_NAME))
|
|
|
|
# Load scaling factors
|
|
base_model_name = 'base_cnn'
|
|
print("Load scaling from %s-scaling.pkl" % base_model_name)
|
|
minmax = util.load_minmax(datadir, '%s-scaling.pkl' % base_model_name)
|
|
|
|
target_fp = 'train.json'
|
|
print("Load samples from %s..." % target_fp)
|
|
samples = util.load_samples(datadir, target_fp)
|
|
random.shuffle(samples)
|
|
samples = samples[0:400]
|
|
random.shuffle(samples)
|
|
m_tot = len(samples)
|
|
print("Got %d samples" % m_tot)
|
|
|
|
n_test = 6*6*4*2
|
|
y_pred_first = np.zeros((len(samples), 1))
|
|
y_pred_avrg = np.zeros((len(samples), 1))
|
|
y_pred_avrg_sig10 = np.zeros((len(samples), 1))
|
|
y_pred_avrg_sig20 = np.zeros((len(samples), 1))
|
|
y_pred_avrg_sig30 = np.zeros((len(samples), 1))
|
|
y_pred_avrg_sig40 = np.zeros((len(samples), 1))
|
|
y_pred_avrg_sig50 = np.zeros((len(samples), 1))
|
|
y_reals = np.zeros((len(samples), 1))
|
|
print("Average each sample over %d augmented versions" % n_test)
|
|
for i,s in enumerate(samples):
|
|
dev_generator = util.icenet_generator(
|
|
[s], minmax, n_test, crop_offset=3,
|
|
augment=True
|
|
)
|
|
X_dev, Y_dev = list(islice(dev_generator, 1))[0]
|
|
y_pred = model.predict(X_dev, batch_size=n_test)
|
|
|
|
y_pred_first[i,0] = y_pred[0,0]
|
|
y_pred_avrg[i,0] = np.mean(y_pred)
|
|
y_pred_avrg_sig10[i,0] = stretch(y_pred_avrg[i,0], factor=10)
|
|
y_pred_avrg_sig20[i,0] = stretch(y_pred_avrg[i,0], factor=11)
|
|
y_pred_avrg_sig30[i,0] = stretch(y_pred_avrg[i,0], factor=12)
|
|
y_pred_avrg_sig40[i,0] = stretch(y_pred_avrg[i,0], factor=13)
|
|
y_pred_avrg_sig50[i,0] = stretch(y_pred_avrg[i,0], factor=14)
|
|
y_reals[i,0] = s.get('is_iceberg', 0) * 1.0
|
|
|
|
print("Sample %d: %s (iceberg=%s): Mean = %.4f, s(10) = %.4f, s(40) = %.4f std = %.4f, min = %.4f, max = %.4f" % (
|
|
i, s.get('id'), s.get('is_iceberg'),
|
|
np.mean(y_pred),
|
|
y_pred_avrg_sig10[i,0], y_pred_avrg_sig40[i,0],
|
|
np.std(y_pred),
|
|
np.min(y_pred), np.max(y_pred)
|
|
))
|
|
|
|
print("'First' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_first))
|
|
print("'Avrg' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg))
|
|
print("'Avrg stretch(10)' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg_sig10))
|
|
print("'Avrg stretch(20)' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg_sig20))
|
|
print("'Avrg stretch(30)' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg_sig30))
|
|
print("'Avrg stretch(40)' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg_sig40))
|
|
print("'Avrg stretch(50)' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg_sig50))
|
|
|
|
filename = '%s-%s-avrg.csv' % (MODEL_NAME, target_fp.split('.')[0])
|
|
print("Write to %s" % filename)
|
|
util.write_preds(datadir, filename, samples, y_pred_avrg)
|
|
|
|
filename = '%s-%s-fst.csv' % (MODEL_NAME, target_fp.split('.')[0])
|
|
print("Write to %s" % filename)
|
|
util.write_preds(datadir, filename, samples, y_pred_first)
|
|
|
|
filename = '%s-%s-s10.csv' % (MODEL_NAME, target_fp.split('.')[0])
|
|
print("Write to %s" % filename)
|
|
util.write_preds(datadir, filename, samples, y_pred_avrg_sig10)
|
|
|
|
filename = '%s-%s-s20.csv' % (MODEL_NAME, target_fp.split('.')[0])
|
|
print("Write to %s" % filename)
|
|
util.write_preds(datadir, filename, samples, y_pred_avrg_sig20)
|
|
|
|
filename = '%s-%s-s30.csv' % (MODEL_NAME, target_fp.split('.')[0])
|
|
print("Write to %s" % filename)
|
|
util.write_preds(datadir, filename, samples, y_pred_avrg_sig30)
|
|
|
|
filename = '%s-%s-s40.csv' % (MODEL_NAME, target_fp.split('.')[0])
|
|
print("Write to %s" % filename)
|
|
util.write_preds(datadir, filename, samples, y_pred_avrg_sig40)
|
|
|
|
filename = '%s-%s-s50.csv' % (MODEL_NAME, target_fp.split('.')[0])
|
|
print("Write to %s" % filename)
|
|
util.write_preds(datadir, filename, samples, y_pred_avrg_sig50)
|
|
|