2021-10-16 08:49:48 +02:00

333 lines
12 KiB
Python

"""Standard CNN with data augmentation."""
import random
import numpy as np
from itertools import islice
from icenet import util
from keras.models import Model
from keras.initializers import glorot_uniform
from keras.optimizers import Adam
from keras import backend as K
from keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler
from keras.models import load_model
from keras.layers import (
Input, Dense, Activation, Conv2D, BatchNormalization, Flatten,
MaxPooling2D, Dropout, Concatenate, Reshape
)
MODEL_NAME = 'icenet'
def get_base(base_model_filepath, postfix, last_layer, freeze=True):
base_model = load_model(base_model_filepath)
for i,l in enumerate(base_model.layers[0:last_layer+1]):
l.trainable = not (i == 0 or freeze)
l.name = '%s_%s' % (l.name, postfix)
#base_model.layers[last_layer].trainable = True # Should be avrg pool layer
return base_model.input, base_model.layers[last_layer].output
def get_model(base_model_filepath):
print("Load base models ...")
X_inputs, X_base_outputs = list(zip(*[
get_base(base_model_filepath, 'sec%d' % i, -5, freeze=True) for i in range(9)
]))
print("Output layer:")
print(X_base_outputs[0])
#X = Concatenate(axis=2)(list(X_base_outputs))
#X = Reshape((nw*3, nh*3, nc))(X)
#_, n = K.int_shape(X_base_outputs[0])
_, nw, nh, nc = K.int_shape(X_base_outputs[0])
assert nw == 1 and nh == 1 and nc == 512
X = Concatenate(axis=-1)(list(X_base_outputs))
X = Reshape((3, 3, nc))(X)
X = Conv2D(1024,
kernel_size=(3, 3),
strides=(1, 1),
padding='valid',
activation='relu',
kernel_initializer=glorot_uniform(seed=0),
name='conv_top')(X)
#X = MaxPooling2D((3, 3))(X)
X = Flatten()(X)
X = Dropout(0.25)(X)
X = Dense(1024,
activation='relu',
kernel_initializer=glorot_uniform(seed=0),
name='fc1')(X)
X = Dropout(0.25)(X)
X = Dense(1,
activation='sigmoid',
kernel_initializer=glorot_uniform(seed=0),
name='y_hat')(X)
return Model(inputs=list(X_inputs), outputs=X, name=MODEL_NAME)
def train(datadir):
base_model_name = 'base_cnn'
print("Load scaling from %s-scaling.csv" % base_model_name)
minmax = util.load_minmax(datadir, '%s-scaling.pkl' % base_model_name)
print("Load samples from train.json ...")
samples = util.load_samples(datadir, 'train.json')
m_tot = len(samples)
print("Got %d samples" % m_tot)
split = 0.90
train_samples, dev_samples = util.train_dev_split(samples, split)
m_train = len(train_samples)
m_dev = len(dev_samples)
print("Split train/test = %.2f" % split)
print("Training samples: %d" % m_train)
print("Dev samples: %d" % m_dev)
print("First 5 dev samples ID's:")
print(' '.join([s['id'] for s in dev_samples[0:5]]))
# Extract dev_samples
dev_generator = util.icenet_generator(
dev_samples, minmax, m_dev, crop_offset=3,
augment=False
)
X_dev, Y_dev = list(islice(dev_generator, 1))[0]
# Model + opt
def lr_schedule(epoch):
if epoch < 20:
return 0.0005
elif epoch < 50:
return 0.0002
elif epoch < 200:
return 0.00005
else:
return 0.00001
model = get_model(util.model_fp(datadir, '%s-weights-val_loss.h5' % base_model_name))
batch_size = 16
opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model.compile(
optimizer=opt,
loss="binary_crossentropy",
metrics = ["accuracy"]
)
_summary = []
model.summary(print_fn=lambda x: _summary.append(x))
print('\n'.join(_summary[0:8]))
print("...")
print("...")
print("...")
print("...")
print('\n'.join(_summary[-40:]))
# Callbacks
callbacks = [
ModelCheckpoint(
filepath=util.model_fp(datadir, '%s-weights-val_loss.h5' % MODEL_NAME),
verbose=1,
monitor='val_loss',
save_best_only=True
),
ModelCheckpoint(
filepath=util.model_fp(datadir, '%s-weights-loss.h5' % MODEL_NAME),
verbose=1,
monitor='loss',
save_best_only=True
),
LearningRateScheduler(lr_schedule),
EarlyStopping(
'loss',
patience=50,
mode="min"
)
]
# TRAIN!
model.fit_generator(
util.icenet_generator(train_samples, minmax, batch_size, crop_offset=3),
steps_per_epoch=int(2*m_train/batch_size),
epochs=1000,
validation_data=(X_dev, Y_dev),
callbacks=callbacks
)
score = model.evaluate(X_dev, Y_dev)
print("")
print("Test loss: %.4f" % score[0])
print("Test accuracy: %.4f" % score[1])
print("Make dev predictions ...")
y_pred = model.predict(X_dev, batch_size=32)
print("Write to %s-dev.csv" % MODEL_NAME)
util.write_preds(datadir, '%s-dev.csv' % MODEL_NAME, dev_samples, y_pred)
n_test = 30
print("Check invariance on %d random augmented dev samples" % n_test)
for i in range(n_test):
test_sample = random.choice(dev_samples)
dev_generator = util.icenet_generator(
[test_sample], minmax, 6*6*4*2, crop_offset=3,
augment=True
)
X_dev, Y_dev = list(islice(dev_generator, 1))[0]
y_pred = model.predict(X_dev, batch_size=32)
print("Dev sample %s (is_iceberg=%s): Mean = %.4f, std = %.4f, min = %.4f, max = %.4f" % (
test_sample.get('id'), test_sample.get('is_iceberg'),
np.mean(y_pred), np.std(y_pred),
np.min(y_pred), np.max(y_pred)
))
def predict(datadir):
print("Load model from %s-weights-loss.h5 ..." % MODEL_NAME)
model = load_model(util.model_fp(datadir, '%s-weights-loss.h5' % MODEL_NAME))
# Load scaling factors
base_model_name = 'base_cnn'
print("Load scaling from %s-scaling.pkl" % base_model_name)
minmax = util.load_minmax(datadir, '%s-scaling.pkl' % base_model_name)
target_fp = 'train.json'
print("Load samples from %s..." % target_fp)
samples = util.load_samples(datadir, target_fp)
m_tot = len(samples)
print("Got %d samples" % m_tot)
# Extract samples with generator
data_gen = util.icenet_generator(
samples, minmax, m_tot, crop_offset=3,
augment=False
)
X, _= list(islice(data_gen, 1))[0]
print("X (image) shape:")
print(X[0].shape)
# Predict ...
print("Predict!")
y_pred = model.predict(X, batch_size=32)
filename = '%s-%s.csv' % (MODEL_NAME, target_fp.split('.')[0])
print("Write to %s" % filename)
util.write_preds(datadir, filename, samples, y_pred)
n_test = 20
print("Check invariance on %d random augmented dev samples" % n_test)
for i in range(n_test):
test_sample = random.choice(samples)
dev_generator = util.icenet_generator(
[test_sample], minmax, 6*6*4*2, crop_offset=3,
augment=True
)
X_dev, Y_dev = list(islice(dev_generator, 1))[0]
y_pred = model.predict(X_dev, batch_size=32)
print("Dev sample %s (is_iceberg=%s): Mean = %.4f, std = %.4f, min = %.4f, max = %.4f" % (
test_sample.get('id'), test_sample.get('is_iceberg'),
np.mean(y_pred), np.std(y_pred),
np.min(y_pred), np.max(y_pred)
))
def _sigmoid(z):
return 1.0 / (1 + np.exp(-z))
def stretch(z, factor=10):
return _sigmoid(factor*(z-0.5))
def predict_avrg(datadir):
print("Load model from %s-weights-loss.h5 ..." % MODEL_NAME)
model = load_model(util.model_fp(datadir, '%s-weights-loss.h5' % MODEL_NAME))
# Load scaling factors
base_model_name = 'base_cnn'
print("Load scaling from %s-scaling.pkl" % base_model_name)
minmax = util.load_minmax(datadir, '%s-scaling.pkl' % base_model_name)
target_fp = 'train.json'
print("Load samples from %s..." % target_fp)
samples = util.load_samples(datadir, target_fp)
random.shuffle(samples)
samples = samples[0:400]
random.shuffle(samples)
m_tot = len(samples)
print("Got %d samples" % m_tot)
n_test = 6*6*4*2
y_pred_first = np.zeros((len(samples), 1))
y_pred_avrg = np.zeros((len(samples), 1))
y_pred_avrg_sig10 = np.zeros((len(samples), 1))
y_pred_avrg_sig20 = np.zeros((len(samples), 1))
y_pred_avrg_sig30 = np.zeros((len(samples), 1))
y_pred_avrg_sig40 = np.zeros((len(samples), 1))
y_pred_avrg_sig50 = np.zeros((len(samples), 1))
y_reals = np.zeros((len(samples), 1))
print("Average each sample over %d augmented versions" % n_test)
for i,s in enumerate(samples):
dev_generator = util.icenet_generator(
[s], minmax, n_test, crop_offset=3,
augment=True
)
X_dev, Y_dev = list(islice(dev_generator, 1))[0]
y_pred = model.predict(X_dev, batch_size=n_test)
y_pred_first[i,0] = y_pred[0,0]
y_pred_avrg[i,0] = np.mean(y_pred)
y_pred_avrg_sig10[i,0] = stretch(y_pred_avrg[i,0], factor=10)
y_pred_avrg_sig20[i,0] = stretch(y_pred_avrg[i,0], factor=11)
y_pred_avrg_sig30[i,0] = stretch(y_pred_avrg[i,0], factor=12)
y_pred_avrg_sig40[i,0] = stretch(y_pred_avrg[i,0], factor=13)
y_pred_avrg_sig50[i,0] = stretch(y_pred_avrg[i,0], factor=14)
y_reals[i,0] = s.get('is_iceberg', 0) * 1.0
print("Sample %d: %s (iceberg=%s): Mean = %.4f, s(10) = %.4f, s(40) = %.4f std = %.4f, min = %.4f, max = %.4f" % (
i, s.get('id'), s.get('is_iceberg'),
np.mean(y_pred),
y_pred_avrg_sig10[i,0], y_pred_avrg_sig40[i,0],
np.std(y_pred),
np.min(y_pred), np.max(y_pred)
))
print("'First' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_first))
print("'Avrg' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg))
print("'Avrg stretch(10)' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg_sig10))
print("'Avrg stretch(20)' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg_sig20))
print("'Avrg stretch(30)' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg_sig30))
print("'Avrg stretch(40)' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg_sig40))
print("'Avrg stretch(50)' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg_sig50))
filename = '%s-%s-avrg.csv' % (MODEL_NAME, target_fp.split('.')[0])
print("Write to %s" % filename)
util.write_preds(datadir, filename, samples, y_pred_avrg)
filename = '%s-%s-fst.csv' % (MODEL_NAME, target_fp.split('.')[0])
print("Write to %s" % filename)
util.write_preds(datadir, filename, samples, y_pred_first)
filename = '%s-%s-s10.csv' % (MODEL_NAME, target_fp.split('.')[0])
print("Write to %s" % filename)
util.write_preds(datadir, filename, samples, y_pred_avrg_sig10)
filename = '%s-%s-s20.csv' % (MODEL_NAME, target_fp.split('.')[0])
print("Write to %s" % filename)
util.write_preds(datadir, filename, samples, y_pred_avrg_sig20)
filename = '%s-%s-s30.csv' % (MODEL_NAME, target_fp.split('.')[0])
print("Write to %s" % filename)
util.write_preds(datadir, filename, samples, y_pred_avrg_sig30)
filename = '%s-%s-s40.csv' % (MODEL_NAME, target_fp.split('.')[0])
print("Write to %s" % filename)
util.write_preds(datadir, filename, samples, y_pred_avrg_sig40)
filename = '%s-%s-s50.csv' % (MODEL_NAME, target_fp.split('.')[0])
print("Write to %s" % filename)
util.write_preds(datadir, filename, samples, y_pred_avrg_sig50)