From 9a79d162938b60a0102637786cf1564e6aac92df Mon Sep 17 00:00:00 2001
From: Michael Soukup <michael@soukup.io>
Date: Sat, 16 Oct 2021 08:49:48 +0200
Subject: [PATCH] Revive old code.

---
 .dockerignore       |   3 +
 .gitignore          |   4 +
 Dockerfile          |  12 ++
 README.md           |  62 +++++++++
 VERSION             |   1 +
 icenet/__init__.py  |  12 ++
 icenet/base_cnn.py  | 280 +++++++++++++++++++++++++++++++++++++
 icenet/icenet.py    | 332 ++++++++++++++++++++++++++++++++++++++++++++
 icenet/residual.py  | 190 +++++++++++++++++++++++++
 icenet/resnet.py    | 252 +++++++++++++++++++++++++++++++++
 icenet/util.py      | 229 ++++++++++++++++++++++++++++++
 plot.py             | 104 ++++++++++++++
 plot_augmented.py   | 133 ++++++++++++++++++
 run_model.py        |  29 ++++
 run_model_docker.sh |   5 +
 show_predicted.sh   |  11 ++
 16 files changed, 1659 insertions(+)
 create mode 100644 .dockerignore
 create mode 100644 .gitignore
 create mode 100644 Dockerfile
 create mode 100644 README.md
 create mode 100644 VERSION
 create mode 100644 icenet/__init__.py
 create mode 100644 icenet/base_cnn.py
 create mode 100644 icenet/icenet.py
 create mode 100644 icenet/residual.py
 create mode 100644 icenet/resnet.py
 create mode 100644 icenet/util.py
 create mode 100755 plot.py
 create mode 100755 plot_augmented.py
 create mode 100755 run_model.py
 create mode 100755 run_model_docker.sh
 create mode 100755 show_predicted.sh

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..2024641
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,3 @@
+data/
+img/
+**/__pycache__/
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ddd173b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+data/
+img/
+**/__pycache__/
+*.swp
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..540062d
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,12 @@
+FROM gw000/keras:2.0.8-py3-tf-cpu
+
+RUN pip3 install scikit-image sklearn
+
+RUN mkdir /data
+ADD icenet/ /tmp/icenet
+ADD run_model.py /tmp/run_model.py
+WORKDIR /tmp
+
+ENV PYTHONUNBUFFERED=1
+
+ENTRYPOINT ["python3", "run_model.py"]
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..8e55743
--- /dev/null
+++ b/README.md
@@ -0,0 +1,62 @@
+# Ship vs iceberg discriminator
+
+TL;DR: Discriminate between ships and icebergs from SAR imagery.
+
+
+## Approach
+
+Data augmentation and parameter sharing. CNN and ResNets.
+
+
+## Data directory
+
+The data directory is expected to have the structure as shown below:
+
+    data/
+    ├── params
+    │   ├── base_cnn-scaling.pkl
+    │   ├── base_cnn-weights-loss.h5
+    │   ├── base_cnn-weights-val_loss.h5
+    │   ├── icenet-weights-loss.h5
+    │   └── icenet-weights-val_loss.h5
+    ├── predictions
+    │   └── icenet-dev.csv
+    ├── sample_submission.csv
+    ├── test.json
+    └── train.json
+
+where `{train,test}.json` is the data from the
+[kaggle website](https://www.kaggle.com/c/statoil-iceberg-classifier-challenge).
+
+
+## Log
+
+### Residual base CNN
+
+Summary:
+
+  * Test loss: 0.5099
+  * Test accuracy: 0.7932
+  * Epochs: 100
+  * Best val loss at epoch 70 (converged until 100, did not overfit)
+
+Comments:
+
+  * Low variance -- training loss is consistently a bit lower than validation
+    loss.
+  * Since images are "artificially" labeled, it is hard to say what the bias is.
+    There should be some bias since this network does not overfit, and it also
+    looks like training converges after 100 epochs (with decaying learning rate).
+  * There may also be labeling noise. It is indeed suspicious that the validation
+    loss converges with very low variance. Perhaps revisit the labeling
+    approach for the base generator.
+  * Conclusion: Check labeling, then bring out the big guns and expand the
+    residual net.
+
+With this model as a basis for the 9 regions, followed by a reshape, conv and
+two dense layers, yields ok performance: Around 0.20 loss after few epochs.
+
+However, validation loss is often lower than training loss. It might be that
+the two distributions are not the same for both networks -- check the random
+seed and verify! Might also be noisy training (because of augmentation).
+
diff --git a/VERSION b/VERSION
new file mode 100644
index 0000000..3b04cfb
--- /dev/null
+++ b/VERSION
@@ -0,0 +1 @@
+0.2
diff --git a/icenet/__init__.py b/icenet/__init__.py
new file mode 100644
index 0000000..e986b27
--- /dev/null
+++ b/icenet/__init__.py
@@ -0,0 +1,12 @@
+
+try:
+    from icenet import base_cnn, icenet
+except Exception as ex:
+    print("Failed to import keras")
+    print(ex)
+    base_cnn = icenet = None
+
+MODELS = {
+    'base_cnn': base_cnn,
+    'icenet': icenet 
+}
diff --git a/icenet/base_cnn.py b/icenet/base_cnn.py
new file mode 100644
index 0000000..b36fd08
--- /dev/null
+++ b/icenet/base_cnn.py
@@ -0,0 +1,280 @@
+"""Base CNN."""
+
+import random
+import numpy as np
+from itertools import islice
+from icenet import util
+from icenet import residual
+from icenet import resnet
+
+from keras.models import Model
+from keras.initializers import glorot_uniform
+from keras.optimizers import Adam, SGD
+from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
+from keras.models import load_model
+from keras.layers import (
+        Input, Add, Dense, Activation, Conv2D, BatchNormalization, Flatten,
+        MaxPooling2D, AveragePooling2D, Dropout, Concatenate, ZeroPadding2D
+)
+
+
+MODEL_NAME = 'base_cnn'
+
+
+def get_model_simple(img_shape):
+    X_img = Input(img_shape)
+    X = ZeroPadding2D((2, 2))(X_img)
+
+    # Conv 1 
+    X = Conv2D(64,
+               kernel_size=(5, 5),
+               strides=(1, 1),
+               padding='valid',
+               activation='relu',
+               kernel_initializer=glorot_uniform(seed=0),
+               name='conv1')(X)
+    X = Conv2D(128,
+               kernel_size=(3, 3),
+               strides=(1, 1),
+               padding='valid',
+               activation='relu',
+               kernel_initializer=glorot_uniform(seed=0),
+               name='conv2')(X)
+    X = MaxPooling2D((3, 3), strides=(2, 2))(X)
+    X = Dropout(0.2)(X)
+
+    X = Conv2D(128,
+               kernel_size=(3, 3),
+               strides=(1, 1),
+               padding='valid',
+               activation='relu',
+               kernel_initializer=glorot_uniform(seed=0),
+               name='conv3')(X)
+    X = MaxPooling2D((2, 2), strides=(2, 2))(X)
+    X = Dropout(0.2)(X)
+
+    X = Conv2D(64,
+               kernel_size=(3, 3),
+               strides=(1, 1),
+               padding='valid',
+               activation='relu',
+               kernel_initializer=glorot_uniform(seed=0),
+               name='conv4')(X)
+    X = MaxPooling2D((3, 3), strides=(1, 1))(X)
+    X = Dropout(0.2)(X)
+
+    X = Flatten()(X)
+    X = Dense(512,
+              activation='relu',
+              kernel_initializer=glorot_uniform(seed=0),
+              name='fc1')(X)
+    X = Dropout(0.2)(X)
+
+    X = Dense(256,
+              activation='relu',
+              kernel_initializer=glorot_uniform(seed=0),
+              name='fc2')(X)
+    X = Dropout(0.2)(X)
+
+    X = Dense(3,
+              activation='sigmoid',
+              kernel_initializer=glorot_uniform(seed=0),
+              name='y_hat')(X)
+
+    return Model(inputs=X_img, outputs=X, name=MODEL_NAME)
+
+
+def get_model_residual(img_shape):
+    X_img = Input(img_shape)
+
+    X = ZeroPadding2D((2, 2))(X_img)
+
+    # Conv 1 
+    X = Conv2D(32,
+               kernel_size=(5, 5),
+               strides=(1, 1),
+               padding='valid',
+               kernel_initializer=glorot_uniform(seed=0),
+               name='conv1')(X)
+    X = BatchNormalization(axis=3, name='bn_conv1')(X)
+    X = Activation('relu')(X)
+    X = MaxPooling2D((2, 2), strides=(2, 2))(X)
+
+    # Conv 2 (residual)
+    X = residual.convolutional_block(X, 4, (32, 32, 128), 'stage2a', s=1)
+    X = residual.identity_block(X, 4, (32, 32, 128), 'stage2b')
+    #X = residual.identity_block(X, 4, (32, 32, 128), 'stage2c')
+
+    # Conv 3 (residual)
+    X = residual.convolutional_block(X, 3, (64, 64, 256), 'stage3a', s=2)
+    X = residual.identity_block(X, 3, (64, 64, 256), 'stage3b')
+    X = residual.identity_block(X, 3, (64, 64, 256), 'stage3c')
+    X = residual.identity_block(X, 3, (64, 64, 256), 'stage3d')
+
+    # Conv 4 (residual)
+    X = residual.convolutional_block(X, 3, (128, 128, 512), 'stage4a', s=2)
+    X = residual.identity_block(X, 3, (128, 128, 512), 'stage4b')
+    X = residual.identity_block(X, 3, (128, 128, 512), 'stage4c')
+    #X = residual.identity_block(X, 3, (128, 128, 512), 'stage4d')
+    #X = residual.identity_block(X, 3, (128, 128, 512), 'stage4e')
+
+    #X = AveragePooling2D(pool_size=(4, 4), name='avg_pool')(X)
+    X = Conv2D(512,
+               kernel_size=(4, 4),
+               strides=(1, 1),
+               padding='valid',
+               kernel_initializer=glorot_uniform(seed=0),
+               name='convend')(X)
+    X = BatchNormalization(axis=3, name='bn_convend')(X)
+    X = Activation('relu')(X)
+
+    # Flatten
+    X = Flatten()(X)
+
+    X = Dense(3,
+              activation='softmax',
+              kernel_initializer=glorot_uniform(seed=0),
+              name='y_hat')(X)
+
+    return Model(inputs=X_img, outputs=X, name=MODEL_NAME)
+
+
+def get_model_res18(img_shape):
+    X_img = Input(img_shape)
+
+    X = ZeroPadding2D((2, 2))(X_img)
+
+    # Conv 1 
+    X = Conv2D(64,
+               kernel_size=(5, 5),
+               strides=(1, 1),
+               padding='valid',
+               kernel_initializer=glorot_uniform(seed=0),
+               name='conv1')(X)
+    X = BatchNormalization(axis=3, name='bn_conv1')(X)
+    X = Activation('relu')(X)
+
+    # Conv 2 (residual)
+    X = residual.basic_block(X, 3, (64, 64), 'stage2a')
+    X = residual.basic_block(X, 3, (64, 64), 'stage2b')
+
+    # Conv 3 (residual)
+    X = residual.basic_block(X, 3, (128, 128), 'stage3a')
+    X = residual.basic_block(X, 3, (128, 128), 'stage3b')
+
+    # Conv 4 (residual)
+    X = residual.basic_block(X, 3, (256, 256), 'stage4a')
+    X = residual.basic_block(X, 3, (256, 256), 'stage4b')
+
+    # Conv 5 (residual)
+    X = residual.basic_block(X, 3, (512, 512), 'stage5a')
+    X = residual.basic_block(X, 3, (512, 512), 'stage5b')
+
+    # AveragePool
+    X = AveragePooling2D(pool_size=(2, 2), name='avg_pool')(X)
+
+    # Flatten
+    X = Flatten()(X)
+
+    X = Dense(3,
+              activation='softmax',
+              kernel_initializer=glorot_uniform(seed=0),
+              name='y_hat')(X)
+
+    return Model(inputs=X_img, outputs=X, name=MODEL_NAME)
+
+
+def train(datadir):
+    print("Load samples from train.json ...")
+    samples = util.load_samples(datadir, 'train.json')
+    m_tot = len(samples)
+    print("Got %d samples" % m_tot)
+
+    # Split the samples with the same seed every time so that the dev set
+    # is "frozen". This makes it easier to monitor and compare different models.
+    # When a good model is found, the dev set can then be removed entirely
+    # in order to fully utilize the available training data.
+    split = 0.90
+    train_samples, dev_samples = util.train_dev_split(samples, split, shuffle=True)
+    m_train = len(train_samples)
+    m_dev = len(dev_samples)
+    print("Split train/test = %.2f" % split)
+    print("Training samples: %d" % m_train)
+    print("Dev samples: %d" % m_dev)
+    print("First 5 dev samples ID's:")
+    print(' '.join([s['id'] for s in dev_samples[0:5]]))
+
+    # The minimum/maximum values of the entire training set will determine
+    # the scaling factors. We store the scaling factors to file so that they
+    # can be retrieved and re-used for predictions.
+    minmax = util.get_minmax(train_samples)
+    print("Write scaling to %s-scaling.csv" % MODEL_NAME)
+    util.save_minmax(datadir, '%s-scaling.pkl' % MODEL_NAME, minmax)
+
+    # Since we make heavy use of augmentation here, we can also augment
+    # the dev set just to smooth out validation losses during training.
+    # Augment the dev set x10.
+    m_dev *= 10
+    print("Dev samples (augmented): %d" % m_dev)
+    dev_generator = util.base_cnn_generator(
+            dev_samples, minmax, m_dev
+    )
+    X_dev, Y_dev = list(islice(dev_generator, 1))[0]
+
+    # Model + optimization parameters.
+    #model = get_model_res18((28, 28, 2))
+    model = resnet.ResnetBuilder.build_resnet_18((2, 28, 28), 3)
+    batch_size = 32
+    #opt = Adam(lr=0.0002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
+    opt = SGD(lr=0.005, momentum=0.9, decay=1e-5)
+    model.compile(
+            optimizer=opt,
+            loss="categorical_crossentropy",
+            metrics = ["accuracy"]
+    )
+    model.summary()
+
+    # Callbacks
+    callbacks = [
+        ModelCheckpoint(
+            filepath=util.model_fp(datadir, '%s-weights-val_loss.h5' % MODEL_NAME),
+            verbose=1,
+            monitor='val_loss',
+            save_best_only=True
+        ),
+        ModelCheckpoint(
+            filepath=util.model_fp(datadir, '%s-weights-loss.h5' % MODEL_NAME),
+            verbose=1,
+            monitor='loss',
+            save_best_only=True
+        ),
+        ReduceLROnPlateau(
+            monitor='val_loss',
+            factor=0.1,
+            verbose=1,
+            patience=8,
+            epsilon=0.005,
+            mode='min',
+            min_lr=1e-7
+        ),
+        EarlyStopping(
+            'loss',
+            patience=30,
+            mode="min"
+        )
+    ]
+
+    # TRAIN!
+    model.fit_generator(
+            util.base_cnn_generator(train_samples, minmax, batch_size),
+            steps_per_epoch=int(4*m_train/batch_size),
+            epochs=1000,
+            validation_data=(X_dev, Y_dev),
+            callbacks=callbacks
+    )
+    
+    score = model.evaluate(X_dev, Y_dev)
+    print("")
+    print("Test loss: %.4f" % score[0])
+    print("Test accuracy: %.4f" % score[1])
+
diff --git a/icenet/icenet.py b/icenet/icenet.py
new file mode 100644
index 0000000..1f0892d
--- /dev/null
+++ b/icenet/icenet.py
@@ -0,0 +1,332 @@
+"""Standard CNN with data augmentation."""
+
+import random
+import numpy as np
+from itertools import islice
+from icenet import util
+
+from keras.models import Model
+from keras.initializers import glorot_uniform
+from keras.optimizers import Adam
+from keras import backend as K
+from keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler 
+from keras.models import load_model
+from keras.layers import (
+        Input, Dense, Activation, Conv2D, BatchNormalization, Flatten,
+        MaxPooling2D, Dropout, Concatenate, Reshape
+)
+
+
+MODEL_NAME = 'icenet'
+
+
+def get_base(base_model_filepath, postfix, last_layer, freeze=True):
+    base_model = load_model(base_model_filepath)
+    for i,l in enumerate(base_model.layers[0:last_layer+1]):
+        l.trainable = not (i == 0 or freeze)
+        l.name = '%s_%s' % (l.name, postfix)
+    #base_model.layers[last_layer].trainable = True # Should be avrg pool layer
+    return base_model.input, base_model.layers[last_layer].output
+
+
+def get_model(base_model_filepath):
+
+    print("Load base models ...")
+    X_inputs, X_base_outputs = list(zip(*[
+            get_base(base_model_filepath, 'sec%d' % i, -5, freeze=True) for i in range(9)
+    ]))
+    print("Output layer:")
+    print(X_base_outputs[0])
+    
+    #X = Concatenate(axis=2)(list(X_base_outputs))
+    #X = Reshape((nw*3, nh*3, nc))(X)
+    #_, n = K.int_shape(X_base_outputs[0])
+    _, nw, nh, nc = K.int_shape(X_base_outputs[0])
+    assert nw == 1 and nh == 1 and nc == 512
+    X = Concatenate(axis=-1)(list(X_base_outputs))
+    X = Reshape((3, 3, nc))(X)
+
+    X = Conv2D(1024,
+               kernel_size=(3, 3),
+               strides=(1, 1),
+               padding='valid',
+               activation='relu',
+               kernel_initializer=glorot_uniform(seed=0),
+               name='conv_top')(X)
+    #X = MaxPooling2D((3, 3))(X)
+
+    X = Flatten()(X)
+    X = Dropout(0.25)(X)
+    X = Dense(1024,
+              activation='relu',
+              kernel_initializer=glorot_uniform(seed=0),
+              name='fc1')(X)
+    X = Dropout(0.25)(X)
+
+    X = Dense(1,
+              activation='sigmoid',
+              kernel_initializer=glorot_uniform(seed=0),
+              name='y_hat')(X)
+
+    return Model(inputs=list(X_inputs), outputs=X, name=MODEL_NAME)
+
+
+def train(datadir):
+    base_model_name = 'base_cnn'
+    print("Load scaling from %s-scaling.csv" % base_model_name)
+    minmax = util.load_minmax(datadir, '%s-scaling.pkl' % base_model_name)
+
+    print("Load samples from train.json ...")
+    samples = util.load_samples(datadir, 'train.json')
+    m_tot = len(samples)
+    print("Got %d samples" % m_tot)
+
+    split = 0.90
+    train_samples, dev_samples = util.train_dev_split(samples, split)
+    m_train = len(train_samples)
+    m_dev = len(dev_samples)
+    print("Split train/test = %.2f" % split)
+    print("Training samples: %d" % m_train)
+    print("Dev samples: %d" % m_dev)
+    print("First 5 dev samples ID's:")
+    print(' '.join([s['id'] for s in dev_samples[0:5]]))
+
+    # Extract dev_samples
+    dev_generator = util.icenet_generator(
+            dev_samples, minmax, m_dev, crop_offset=3,
+            augment=False
+    )
+    X_dev, Y_dev = list(islice(dev_generator, 1))[0]
+
+    # Model + opt
+    def lr_schedule(epoch):
+        if epoch < 20:
+            return 0.0005
+        elif epoch < 50:
+            return 0.0002
+        elif epoch < 200:
+            return 0.00005
+        else:
+            return 0.00001
+    model = get_model(util.model_fp(datadir, '%s-weights-val_loss.h5' % base_model_name))
+    batch_size = 16
+    opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
+    model.compile(
+            optimizer=opt,
+            loss="binary_crossentropy",
+            metrics = ["accuracy"]
+    )
+    _summary = []
+    model.summary(print_fn=lambda x: _summary.append(x))
+    print('\n'.join(_summary[0:8]))
+    print("...")
+    print("...")
+    print("...")
+    print("...")
+    print('\n'.join(_summary[-40:]))
+
+    # Callbacks
+    callbacks = [
+        ModelCheckpoint(
+            filepath=util.model_fp(datadir, '%s-weights-val_loss.h5' % MODEL_NAME),
+            verbose=1,
+            monitor='val_loss',
+            save_best_only=True
+        ),
+        ModelCheckpoint(
+            filepath=util.model_fp(datadir, '%s-weights-loss.h5' % MODEL_NAME),
+            verbose=1,
+            monitor='loss',
+            save_best_only=True
+        ),
+        LearningRateScheduler(lr_schedule),
+        EarlyStopping(
+            'loss',
+            patience=50,
+            mode="min"
+        )
+    ]
+
+    # TRAIN!
+    model.fit_generator(
+            util.icenet_generator(train_samples, minmax, batch_size, crop_offset=3),
+            steps_per_epoch=int(2*m_train/batch_size),
+            epochs=1000,
+            validation_data=(X_dev, Y_dev),
+            callbacks=callbacks
+    )
+    
+    score = model.evaluate(X_dev, Y_dev)
+    print("")
+    print("Test loss: %.4f" % score[0])
+    print("Test accuracy: %.4f" % score[1])
+
+    print("Make dev predictions ...")
+    y_pred = model.predict(X_dev, batch_size=32)
+    print("Write to %s-dev.csv" % MODEL_NAME)
+    util.write_preds(datadir, '%s-dev.csv' % MODEL_NAME, dev_samples, y_pred)
+
+    n_test = 30
+    print("Check invariance on %d random augmented dev samples" % n_test)
+    for i in range(n_test):
+        test_sample = random.choice(dev_samples)
+        dev_generator = util.icenet_generator(
+                [test_sample], minmax, 6*6*4*2, crop_offset=3,
+                augment=True
+        )
+        X_dev, Y_dev = list(islice(dev_generator, 1))[0]
+        y_pred = model.predict(X_dev, batch_size=32)
+        print("Dev sample %s (is_iceberg=%s): Mean = %.4f, std = %.4f, min = %.4f, max = %.4f" % (
+            test_sample.get('id'), test_sample.get('is_iceberg'),
+            np.mean(y_pred), np.std(y_pred),
+            np.min(y_pred), np.max(y_pred)
+        ))
+
+
+
+def predict(datadir):
+    print("Load model from %s-weights-loss.h5 ..." % MODEL_NAME)
+    model = load_model(util.model_fp(datadir, '%s-weights-loss.h5' % MODEL_NAME))
+
+    # Load scaling factors
+    base_model_name = 'base_cnn'
+    print("Load scaling from %s-scaling.pkl" % base_model_name)
+    minmax = util.load_minmax(datadir, '%s-scaling.pkl' % base_model_name)
+
+    target_fp = 'train.json'
+    print("Load samples from %s..." % target_fp)
+    samples = util.load_samples(datadir, target_fp)
+    m_tot = len(samples)
+    print("Got %d samples" % m_tot)
+
+    # Extract samples with generator
+    data_gen = util.icenet_generator(
+            samples, minmax, m_tot, crop_offset=3,
+            augment=False
+    )
+    X, _= list(islice(data_gen, 1))[0]
+    print("X (image) shape:")
+    print(X[0].shape)
+    
+    # Predict ...
+    print("Predict!")
+    y_pred = model.predict(X, batch_size=32)
+
+    filename = '%s-%s.csv' % (MODEL_NAME, target_fp.split('.')[0])
+    print("Write to %s" % filename)
+    util.write_preds(datadir, filename, samples, y_pred)
+
+    n_test = 20
+    print("Check invariance on %d random augmented dev samples" % n_test)
+    for i in range(n_test):
+        test_sample = random.choice(samples)
+        dev_generator = util.icenet_generator(
+                [test_sample], minmax, 6*6*4*2, crop_offset=3,
+                augment=True
+        )
+        X_dev, Y_dev = list(islice(dev_generator, 1))[0]
+        y_pred = model.predict(X_dev, batch_size=32)
+        print("Dev sample %s (is_iceberg=%s): Mean = %.4f, std = %.4f, min = %.4f, max = %.4f" % (
+            test_sample.get('id'), test_sample.get('is_iceberg'),
+            np.mean(y_pred), np.std(y_pred),
+            np.min(y_pred), np.max(y_pred)
+        ))
+
+
+def _sigmoid(z):
+    return 1.0 / (1 + np.exp(-z))
+
+
+def stretch(z, factor=10):
+    return _sigmoid(factor*(z-0.5))
+
+
+def predict_avrg(datadir):
+    print("Load model from %s-weights-loss.h5 ..." % MODEL_NAME)
+    model = load_model(util.model_fp(datadir, '%s-weights-loss.h5' % MODEL_NAME))
+
+    # Load scaling factors
+    base_model_name = 'base_cnn'
+    print("Load scaling from %s-scaling.pkl" % base_model_name)
+    minmax = util.load_minmax(datadir, '%s-scaling.pkl' % base_model_name)
+
+    target_fp = 'train.json'
+    print("Load samples from %s..." % target_fp)
+    samples = util.load_samples(datadir, target_fp)
+    random.shuffle(samples)
+    samples = samples[0:400]
+    random.shuffle(samples)
+    m_tot = len(samples)
+    print("Got %d samples" % m_tot)
+
+    n_test = 6*6*4*2
+    y_pred_first = np.zeros((len(samples), 1))
+    y_pred_avrg = np.zeros((len(samples), 1))
+    y_pred_avrg_sig10 = np.zeros((len(samples), 1))
+    y_pred_avrg_sig20 = np.zeros((len(samples), 1))
+    y_pred_avrg_sig30 = np.zeros((len(samples), 1))
+    y_pred_avrg_sig40 = np.zeros((len(samples), 1))
+    y_pred_avrg_sig50 = np.zeros((len(samples), 1))
+    y_reals = np.zeros((len(samples), 1))
+    print("Average each sample over %d augmented versions" % n_test)
+    for i,s in enumerate(samples):
+        dev_generator = util.icenet_generator(
+                [s], minmax, n_test, crop_offset=3,
+                augment=True
+        )
+        X_dev, Y_dev = list(islice(dev_generator, 1))[0]
+        y_pred = model.predict(X_dev, batch_size=n_test)
+
+        y_pred_first[i,0] = y_pred[0,0]
+        y_pred_avrg[i,0] = np.mean(y_pred)
+        y_pred_avrg_sig10[i,0] = stretch(y_pred_avrg[i,0], factor=10)
+        y_pred_avrg_sig20[i,0] = stretch(y_pred_avrg[i,0], factor=11)
+        y_pred_avrg_sig30[i,0] = stretch(y_pred_avrg[i,0], factor=12)
+        y_pred_avrg_sig40[i,0] = stretch(y_pred_avrg[i,0], factor=13)
+        y_pred_avrg_sig50[i,0] = stretch(y_pred_avrg[i,0], factor=14)
+        y_reals[i,0] = s.get('is_iceberg', 0) * 1.0
+
+        print("Sample %d: %s (iceberg=%s): Mean = %.4f, s(10) = %.4f, s(40) = %.4f std = %.4f, min = %.4f, max = %.4f" % (
+            i, s.get('id'), s.get('is_iceberg'),
+            np.mean(y_pred), 
+            y_pred_avrg_sig10[i,0], y_pred_avrg_sig40[i,0],
+            np.std(y_pred),
+            np.min(y_pred), np.max(y_pred)
+        ))
+
+    print("'First' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_first))
+    print("'Avrg' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg))
+    print("'Avrg stretch(10)' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg_sig10))
+    print("'Avrg stretch(20)' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg_sig20))
+    print("'Avrg stretch(30)' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg_sig30))
+    print("'Avrg stretch(40)' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg_sig40))
+    print("'Avrg stretch(50)' loss: %.4f" % util.binary_crossentropy(y_reals, y_pred_avrg_sig50))
+
+    filename = '%s-%s-avrg.csv' % (MODEL_NAME, target_fp.split('.')[0])
+    print("Write to %s" % filename)
+    util.write_preds(datadir, filename, samples, y_pred_avrg)
+
+    filename = '%s-%s-fst.csv' % (MODEL_NAME, target_fp.split('.')[0])
+    print("Write to %s" % filename)
+    util.write_preds(datadir, filename, samples, y_pred_first)
+
+    filename = '%s-%s-s10.csv' % (MODEL_NAME, target_fp.split('.')[0])
+    print("Write to %s" % filename)
+    util.write_preds(datadir, filename, samples, y_pred_avrg_sig10)
+
+    filename = '%s-%s-s20.csv' % (MODEL_NAME, target_fp.split('.')[0])
+    print("Write to %s" % filename)
+    util.write_preds(datadir, filename, samples, y_pred_avrg_sig20)
+
+    filename = '%s-%s-s30.csv' % (MODEL_NAME, target_fp.split('.')[0])
+    print("Write to %s" % filename)
+    util.write_preds(datadir, filename, samples, y_pred_avrg_sig30)
+
+    filename = '%s-%s-s40.csv' % (MODEL_NAME, target_fp.split('.')[0])
+    print("Write to %s" % filename)
+    util.write_preds(datadir, filename, samples, y_pred_avrg_sig40)
+
+    filename = '%s-%s-s50.csv' % (MODEL_NAME, target_fp.split('.')[0])
+    print("Write to %s" % filename)
+    util.write_preds(datadir, filename, samples, y_pred_avrg_sig50)
+
diff --git a/icenet/residual.py b/icenet/residual.py
new file mode 100644
index 0000000..8842a2f
--- /dev/null
+++ b/icenet/residual.py
@@ -0,0 +1,190 @@
+"""Residual blocks: Conv and identity"""
+
+from keras.initializers import glorot_uniform
+from keras.layers import Add, Activation, Conv2D, BatchNormalization
+
+
+def basic_block(X, f, filters, s, label):
+    """
+    Residual net identity block.
+    
+    Arguments:
+    X -- input tensor of shape (m, n_H_prev, n_W_prev, n_C_prev)
+    f -- integer, specifying the shape of the middle CONV's window for the main path
+    filters -- python list of integers, defining the number of filters in the CONV layers of the main path
+    label -- label to use for naming
+    s -- strides
+    
+    Returns:
+    X -- output of the identity block, tensor of shape (n_H, n_W, n_C)
+    """
+    F1, F2 = filters
+    X_shortcut = X
+
+    # First component of main path 
+    X = Conv2D(
+            F1,
+            kernel_size=(f, f),
+            strides=(s, s),
+            padding='same',
+            name='conv_%s_2a' % label,
+            kernel_initializer=glorot_uniform(seed=0)
+    )(X)
+    X = BatchNormalization(axis=3, name='bn_%s_2a' % label)(X)
+    X = Activation('relu')(X)
+
+    # Second component of main path
+    X = Conv2D(
+            F2,
+            kernel_size=(f, f),
+            strides=(1, 1),
+            padding='same',
+            name='conv_%s_2b' % label,
+            kernel_initializer=glorot_uniform(seed=0)
+    )(X)
+    X = BatchNormalization(axis=3, name='bn_%s_2b' % label)(X)
+
+    # Shortcut path
+    X_shortcut = Conv2D(
+            F2,
+            kernel_size=(1, 1),
+            strides=(s, s),
+            padding='valid',
+            name='conv_%s_1' % label,
+            kernel_initializer=glorot_uniform(seed=0)
+    )(X_shortcut)
+    X_shortcut = BatchNormalization(axis=3, name='bn_%s_1' % label)(X_shortcut)
+
+    # Add shortcut value to main path, and pass it through a RELU activation
+    X = Add()([X_shortcut, X])
+    X = Activation('relu')(X)
+    
+    return X
+
+
+def identity_block(X, f, filters, label):
+    """
+    Residual net identity block.
+    
+    Arguments:
+    X -- input tensor of shape (m, n_H_prev, n_W_prev, n_C_prev)
+    f -- integer, specifying the shape of the middle CONV's window for the main path
+    filters -- python list of integers, defining the number of filters in the CONV layers of the main path
+    label -- label to use for naming
+    
+    Returns:
+    X -- output of the identity block, tensor of shape (n_H, n_W, n_C)
+    """
+    F1, F2, F3 = filters
+    X_shortcut = X
+
+    # First component of main path 
+    X = Conv2D(
+            F1,
+            kernel_size=(1, 1),
+            strides=(1,1),
+            padding='valid',
+            name='conv_%s_2a' % label,
+            kernel_initializer=glorot_uniform(seed=0)
+    )(X)
+    X = BatchNormalization(axis=3, name='bn_%s_2a' % label)(X)
+    X = Activation('relu')(X)
+
+    # Second component of main path
+    X = Conv2D(
+            F2,
+            kernel_size=(f, f),
+            strides=(1, 1),
+            padding='same',
+            name='conv_%s_2b' % label,
+            kernel_initializer=glorot_uniform(seed=0)
+    )(X)
+    X = BatchNormalization(axis=3, name='bn_%s_2b' % label)(X)
+    X = Activation('relu')(X)
+
+    # Third component of main path
+    X = Conv2D(
+            F3,
+            kernel_size=(1, 1),
+            strides=(1, 1),
+            padding='valid',
+            name='conv_%s_2c' % label,
+            kernel_initializer=glorot_uniform(seed=0)
+    )(X)
+    X = BatchNormalization(axis=3, name='bn_%s_2c' % label)(X)
+
+    # Add shortcut value to main path, and pass it through a RELU activation
+    X = Add()([X_shortcut, X])
+    X = Activation('relu')(X)
+    
+    return X
+
+
+def convolutional_block(X, f, filters, label, s=2):
+    """
+    Residual net convolutional block.
+    
+    Arguments:
+    X -- input tensor of shape (m, n_H_prev, n_W_prev, n_C_prev)
+    f -- integer, specifying the shape of the middle CONV's window for the main path
+    filters -- python list of integers, defining the number of filters in the CONV layers of the main path
+    label -- label to use for naming
+    s -- Integer, specifying the stride to be used
+    
+    Returns:
+    X -- output of the convolutional block, tensor of shape (n_H, n_W, n_C)
+    """
+    F1, F2, F3 = filters
+    X_shortcut = X
+
+    # First component of main path 
+    X = Conv2D(
+            F1,
+            kernel_size=(1, 1),
+            strides=(s,s),
+            padding='valid',
+            name='conv_%s_2a' % label,
+            kernel_initializer=glorot_uniform(seed=0)
+    )(X)
+    X = BatchNormalization(axis=3, name='bn_%s_2a' % label)(X)
+    X = Activation('relu')(X)
+
+    # Second component of main path
+    X = Conv2D(
+            F2,
+            kernel_size=(f, f),
+            strides=(1, 1),
+            padding='same',
+            name='conv_%s_2b' % label,
+            kernel_initializer=glorot_uniform(seed=0)
+    )(X)
+    X = BatchNormalization(axis=3, name='bn_%s_2b' % label)(X)
+    X = Activation('relu')(X)
+
+    # Third component of main path
+    X = Conv2D(
+            F3,
+            kernel_size=(1, 1),
+            strides=(1, 1),
+            padding='valid',
+            name='conv_%s_2c' % label,
+            kernel_initializer=glorot_uniform(seed=0)
+    )(X)
+    X = BatchNormalization(axis=3, name='bn_%s_2c' % label)(X)
+
+    # Shortcut path
+    X_shortcut = Conv2D(
+            F3,
+            kernel_size=(1, 1),
+            strides=(s, s),
+            padding='valid',
+            name='conv_%s_1' % label,
+            kernel_initializer=glorot_uniform(seed=0)
+    )(X_shortcut)
+    X_shortcut = BatchNormalization(axis=3, name='bn_%s_1' % label)(X_shortcut)
+
+    # Add shortcut value to main path, and pass it through a RELU activation
+    X = Add()([X_shortcut, X])
+    X = Activation('relu')(X)
+
+    return X
diff --git a/icenet/resnet.py b/icenet/resnet.py
new file mode 100644
index 0000000..8c3f9ed
--- /dev/null
+++ b/icenet/resnet.py
@@ -0,0 +1,252 @@
+from __future__ import division
+
+import six
+from keras.models import Model
+from keras.layers import (
+    Input,
+    Activation,
+    Dense,
+    Flatten
+)
+from keras.layers.convolutional import (
+    Conv2D,
+    MaxPooling2D,
+    AveragePooling2D
+)
+from keras.layers.merge import add
+from keras.layers.normalization import BatchNormalization
+from keras.regularizers import l2
+from keras import backend as K
+
+
+def _bn_relu(input):
+    """Helper to build a BN -> relu block
+    """
+    norm = BatchNormalization(axis=CHANNEL_AXIS)(input)
+    return Activation("relu")(norm)
+
+
+def _conv_bn_relu(**conv_params):
+    """Helper to build a conv -> BN -> relu block
+    """
+    filters = conv_params["filters"]
+    kernel_size = conv_params["kernel_size"]
+    strides = conv_params.setdefault("strides", (1, 1))
+    kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
+    padding = conv_params.setdefault("padding", "same")
+    kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))
+
+    def f(input):
+        conv = Conv2D(filters=filters, kernel_size=kernel_size,
+                      strides=strides, padding=padding,
+                      kernel_initializer=kernel_initializer,
+                      kernel_regularizer=kernel_regularizer)(input)
+        return _bn_relu(conv)
+
+    return f
+
+
+def _bn_relu_conv(**conv_params):
+    """Helper to build a BN -> relu -> conv block.
+    This is an improved scheme proposed in http://arxiv.org/pdf/1603.05027v2.pdf
+    """
+    filters = conv_params["filters"]
+    kernel_size = conv_params["kernel_size"]
+    strides = conv_params.setdefault("strides", (1, 1))
+    kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
+    padding = conv_params.setdefault("padding", "same")
+    kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))
+
+    def f(input):
+        activation = _bn_relu(input)
+        return Conv2D(filters=filters, kernel_size=kernel_size,
+                      strides=strides, padding=padding,
+                      kernel_initializer=kernel_initializer,
+                      kernel_regularizer=kernel_regularizer)(activation)
+
+    return f
+
+
+def _shortcut(input, residual):
+    """Adds a shortcut between input and residual block and merges them with "sum"
+    """
+    # Expand channels of shortcut to match residual.
+    # Stride appropriately to match residual (width, height)
+    # Should be int if network architecture is correctly configured.
+    input_shape = K.int_shape(input)
+    residual_shape = K.int_shape(residual)
+    stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS]))
+    stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS]))
+    equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS]
+
+    shortcut = input
+    # 1 X 1 conv if shape is different. Else identity.
+    if stride_width > 1 or stride_height > 1 or not equal_channels:
+        shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS],
+                          kernel_size=(1, 1),
+                          strides=(stride_width, stride_height),
+                          padding="valid",
+                          kernel_initializer="he_normal",
+                          kernel_regularizer=l2(0.0001))(input)
+
+    return add([shortcut, residual])
+
+
+def _residual_block(block_function, filters, repetitions, is_first_layer=False):
+    """Builds a residual block with repeating bottleneck blocks.
+    """
+    def f(input):
+        for i in range(repetitions):
+            init_strides = (1, 1)
+            if i == 0 and not is_first_layer:
+                init_strides = (2, 2)
+            input = block_function(filters=filters, init_strides=init_strides,
+                                   is_first_block_of_first_layer=(is_first_layer and i == 0))(input)
+        return input
+
+    return f
+
+
+def basic_block(filters, init_strides=(1, 1), is_first_block_of_first_layer=False):
+    """Basic 3 X 3 convolution blocks for use on resnets with layers <= 34.
+    Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf
+    """
+    def f(input):
+
+        if is_first_block_of_first_layer:
+            # don't repeat bn->relu since we just did bn->relu->maxpool
+            conv1 = Conv2D(filters=filters, kernel_size=(3, 3),
+                           strides=init_strides,
+                           padding="same",
+                           kernel_initializer="he_normal",
+                           kernel_regularizer=l2(1e-4))(input)
+        else:
+            conv1 = _bn_relu_conv(filters=filters, kernel_size=(3, 3),
+                                  strides=init_strides)(input)
+
+        residual = _bn_relu_conv(filters=filters, kernel_size=(3, 3))(conv1)
+        return _shortcut(input, residual)
+
+    return f
+
+
+def bottleneck(filters, init_strides=(1, 1), is_first_block_of_first_layer=False):
+    """Bottleneck architecture for > 34 layer resnet.
+    Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf
+
+    Returns:
+        A final conv layer of filters * 4
+    """
+    def f(input):
+
+        if is_first_block_of_first_layer:
+            # don't repeat bn->relu since we just did bn->relu->maxpool
+            conv_1_1 = Conv2D(filters=filters, kernel_size=(1, 1),
+                              strides=init_strides,
+                              padding="same",
+                              kernel_initializer="he_normal",
+                              kernel_regularizer=l2(1e-4))(input)
+        else:
+            conv_1_1 = _bn_relu_conv(filters=filters, kernel_size=(1, 1),
+                                     strides=init_strides)(input)
+
+        conv_3_3 = _bn_relu_conv(filters=filters, kernel_size=(3, 3))(conv_1_1)
+        residual = _bn_relu_conv(filters=filters * 4, kernel_size=(1, 1))(conv_3_3)
+        return _shortcut(input, residual)
+
+    return f
+
+
+def _handle_dim_ordering():
+    global ROW_AXIS
+    global COL_AXIS
+    global CHANNEL_AXIS
+    if K.image_dim_ordering() == 'tf':
+        ROW_AXIS = 1
+        COL_AXIS = 2
+        CHANNEL_AXIS = 3
+    else:
+        CHANNEL_AXIS = 1
+        ROW_AXIS = 2
+        COL_AXIS = 3
+
+
+def _get_block(identifier):
+    if isinstance(identifier, six.string_types):
+        res = globals().get(identifier)
+        if not res:
+            raise ValueError('Invalid {}'.format(identifier))
+        return res
+    return identifier
+
+
+class ResnetBuilder(object):
+    @staticmethod
+    def build(input_shape, num_outputs, block_fn, repetitions):
+        """Builds a custom ResNet like architecture.
+
+        Args:
+            input_shape: The input shape in the form (nb_channels, nb_rows, nb_cols)
+            num_outputs: The number of outputs at final softmax layer
+            block_fn: The block function to use. This is either `basic_block` or `bottleneck`.
+                The original paper used basic_block for layers < 50
+            repetitions: Number of repetitions of various block units.
+                At each block unit, the number of filters are doubled and the input size is halved
+
+        Returns:
+            The keras `Model`.
+        """
+        _handle_dim_ordering()
+        if len(input_shape) != 3:
+            raise Exception("Input shape should be a tuple (nb_channels, nb_rows, nb_cols)")
+
+        # Permute dimension order if necessary
+        if K.image_dim_ordering() == 'tf':
+            input_shape = (input_shape[1], input_shape[2], input_shape[0])
+
+        # Load function from str if needed.
+        block_fn = _get_block(block_fn)
+
+        input = Input(shape=input_shape)
+        conv1 = _conv_bn_relu(filters=64, kernel_size=(7, 7), strides=(2, 2))(input)
+        pool1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="same")(conv1)
+
+        block = pool1
+        filters = 64
+        for i, r in enumerate(repetitions):
+            block = _residual_block(block_fn, filters=filters, repetitions=r, is_first_layer=(i == 0))(block)
+            filters *= 2
+
+        # Last activation
+        block = _bn_relu(block)
+
+        # Classifier block
+        block_shape = K.int_shape(block)
+        pool2 = AveragePooling2D(pool_size=(block_shape[ROW_AXIS], block_shape[COL_AXIS]),
+                                 strides=(1, 1))(block)
+        flatten1 = Flatten()(pool2)
+        dense = Dense(units=num_outputs, kernel_initializer="he_normal",
+                      activation="softmax")(flatten1)
+
+        model = Model(inputs=input, outputs=dense)
+        return model
+
+    @staticmethod
+    def build_resnet_18(input_shape, num_outputs):
+        return ResnetBuilder.build(input_shape, num_outputs, basic_block, [2, 2, 2, 2])
+
+    @staticmethod
+    def build_resnet_34(input_shape, num_outputs):
+        return ResnetBuilder.build(input_shape, num_outputs, basic_block, [3, 4, 6, 3])
+
+    @staticmethod
+    def build_resnet_50(input_shape, num_outputs):
+        return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 4, 6, 3])
+
+    @staticmethod
+    def build_resnet_101(input_shape, num_outputs):
+        return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 4, 23, 3])
+
+    @staticmethod
+    def build_resnet_152(input_shape, num_outputs):
+        return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 8, 36, 3])
diff --git a/icenet/util.py b/icenet/util.py
new file mode 100644
index 0000000..614b2cc
--- /dev/null
+++ b/icenet/util.py
@@ -0,0 +1,229 @@
+import os
+import json
+import numpy as np
+import random
+
+from itertools import cycle
+from skimage import transform
+from sklearn.externals import joblib
+from sklearn.metrics import log_loss
+
+
+def binary_crossentropy(y_true, y_pred):
+    return log_loss(y_true.flatten(), y_pred.flatten())
+
+
+def get_bands(band1, band2):
+    return np.dstack((
+        np.array(band1).reshape(75, 75),
+        np.array(band2).reshape(75, 75)
+    ))
+
+
+def get_angle(a):
+    if a == 'na':
+        return None
+    else:
+        return a
+
+
+def get_label(l):
+    if l is None:
+        return l
+    else:
+        return int(l)
+
+
+def unpack(samples):
+    images, angles, labels = zip(*[
+        (get_bands(s['band_1'], s['band_2']),
+        get_angle(s['inc_angle']),
+        get_label(s.get('is_iceberg')))
+        for s in samples
+    ])
+    return images, angles, labels
+
+
+def load_samples(datadir, filename):
+    with open(os.path.join(datadir, filename)) as f:
+        samples = json.load(f)
+    return samples
+
+
+def train_dev_split(samples, split, shuffle=True):
+    split_idx = int(split*len(samples))
+    if shuffle:
+        random.seed(0)
+        random.shuffle(samples)
+    return (samples[0:split_idx], samples[split_idx:])
+
+
+def write_preds(datadir, filename, samples, y_pred):
+    with open(os.path.join(datadir, 'predictions', filename), 'w') as f:
+        f.write('id,is_iceberg\n')
+        f.write('\n'.join([
+            '%s,%.6f' % (samples[i].get('id'), y_pred[i][0])
+            for i in range(len(samples))
+        ]))
+        f.write('\n')
+
+
+def model_fp(datadir, filename):
+    return os.path.join(datadir, 'params', filename)
+
+
+def save_minmax(datadir, filename, minmax):
+    joblib.dump(minmax, os.path.join(datadir, 'params', filename))
+
+
+def load_minmax(datadir, filename):
+    return joblib.load(os.path.join(datadir, 'params', filename))
+
+
+def get_minmax(samples):
+    # Extract global mins/max' for normalization
+    images, angles, labels = unpack(samples)
+    b1_min = min([im[:, :, 0].min() for im in images])
+    b1_max = max([im[:, :, 0].max() for im in images])
+    b2_min = min([im[:, :, 1].min() for im in images])
+    b2_max = max([im[:, :, 1].max() for im in images])
+    a_min = min(angles, key=lambda x: x or 1e9)
+    a_max = max(angles, key=lambda x: x or -1e9)
+    print("Band 1 min/max: %.2f, %.2f" % (b1_min, b1_max))
+    print("Band 2 min/max: %.2f, %.2f" % (b2_min, b2_max))
+    print("Angles min/max: %.2f, %.2f" % (a_min, a_max))
+    return (b1_min, b1_max, b2_min, b2_max, a_min, a_max)
+
+
+def base_cnn_generator(samples, minmax, batch_size, verbose=False):
+    img_dim = 75
+    window = 28
+    r = int(window/2)
+    padding = int(window/np.sqrt(2))
+    if verbose:
+        print("window is %d px" % window)
+        print("padding is %d px" % padding)
+
+    # Fuck the angles.
+    images, _, labels = unpack(samples)
+    b1_min, b1_max, b2_min, b2_max, _, _ = minmax
+
+    # Yield batches forever, cycling the samples with augmentation:
+    # Random rotation and crop, and random Bernoulli mirroring.
+    batch_images = []
+    batch_labels = []
+    for image, label in cycle(zip(images, labels)):
+        # Scale each band independently
+        img = np.dstack((
+            (image[:, :, 0] - b1_min) / (b1_max - b1_min),
+            (image[:, :, 1] - b2_min) / (b2_max - b2_min),
+        ))
+
+        # Save the maximum of both bands within this image
+        b1_peak = img[:, :, 0].max()
+        b2_peak = img[:, :, 1].max()
+
+        # Rotate around random midpoint
+        row_mid = random.randint(padding, img_dim-padding-1)
+        col_mid = random.randint(padding, img_dim-padding-1)
+        if verbose:
+            print("\nSample %d" % (len(batch_labels)+1))
+            print("row/col mid: %d/%d" % (row_mid, col_mid))
+            print("Mid values: %.2f, %.2f" % (img[row_mid, col_mid, 0], img[row_mid, col_mid, 1]))
+
+        # NOTE: sklearn.transform.rotate behaves incorrectly when using kwargs 'center'
+        # Clip to mid with padding before rotating.
+        img = img[row_mid-padding:row_mid+padding, col_mid-padding:col_mid+padding, :]
+        rot = random.random() * 360 - 180
+        img = transform.rotate(img, rot, order=1)
+        if verbose:
+            print("Rotation: %.2f deg" % rot)
+            print("Mid values: %.2f, %.2f" % (img[padding, padding, 0], img[padding, padding, 1]))
+        img = img[padding-r:padding+r, padding-r:padding+r, :]
+
+        # Mirror
+        if random.randint(0, 1) and False:
+            img = np.fliplr(img)
+            if verbose:
+                print("Sample was mirrored")
+
+        # Label by checking that subimage contains maximum of either band
+        # If the subimage contains a max value that is within 95% of the image
+        # max, consider it to contain the feature.
+        sub_b1_peak = img[:, :, 0].max()
+        sub_b2_peak = img[:, :, 1].max()
+        l = int(sub_b1_peak/b1_peak > 0.95 and
+                sub_b2_peak/b2_peak > 0.95) * (label + 1)
+        if verbose:
+            print("Label is %d" % l)
+        categorical_label = tuple(int(i==l) for i in range(3))
+
+        batch_images.append(img)
+        batch_labels.append(categorical_label)
+        if len(batch_labels) == batch_size:
+            yield (
+                np.array(batch_images, dtype=np.float32),
+                np.array(batch_labels, dtype=np.float32)
+            )
+            batch_images = []
+            batch_labels = []
+
+
+def augment_image(img, mirror, rots):
+    if mirror:
+        img = np.fliplr(img)
+    img = np.rot90(img, k=rots)
+    return img
+
+
+def icenet_generator(samples, minmax, batch_size, crop_offset=3,
+                   augment=True, verbose=False):
+    img_dim = 75
+    mid_x = mid_y = 75.0 / 2
+    crop_dim = img_dim - 2*crop_offset
+    window = 28
+    r = int(window/2)
+    
+    images, _, labels = unpack(samples)
+    b1_min, b1_max, b2_min, b2_max, _, _ = minmax
+
+    # Yield batches forever
+    # A batch consists of an input-output tuple
+    #   ([X_image_sec1, X_image_sec2, ... , X_image_sec9], Y_labels)
+    # where the input image is split over 9 overlapping sections, starting
+    # from upper left in row-major order
+    batch_image_sections = [[] for _ in range(9)]
+    batch_labels = []
+    for image, label in cycle(zip(images, labels)):
+        # Scale each band independently
+        img = np.dstack((
+            (image[:, :, 0] - b1_min) / (b1_max - b1_min),
+            (image[:, :, 1] - b2_min) / (b2_max - b2_min),
+        ))
+
+        # Crop with random offset from midpoint
+        row_offset = crop_offset * (2*random.random() - 1)
+        col_offset = crop_offset * (2*random.random() - 1)
+        if not augment:
+            row_offset = col_offset = 0
+        row = int(mid_y + row_offset - crop_dim/2)
+        col = int(mid_x + col_offset - crop_dim/2)
+        img = img[row:row+crop_dim, col:col+crop_dim, :]
+
+        if augment:
+            img = augment_image(img, random.randint(0, 1), random.randint(0, 3))
+
+        # Append each section of the image
+        for i, ridx in enumerate([r, int(mid_y), crop_dim-r]):
+            for j, cidx in enumerate([r, int(mid_x), crop_dim-r]):
+                batch_image_sections[i*3+j].append(img[ridx-r:ridx+r, cidx-r:cidx+r, :])
+
+        batch_labels.append(label)
+        if len(batch_labels) == batch_size:
+            yield (
+                [np.array(sec_imgs, dtype=np.float32) for sec_imgs in batch_image_sections],
+                np.array(batch_labels, dtype=np.float32)
+            )
+            batch_image_sections = [[] for _ in range(9)]
+            batch_labels = []
+
diff --git a/plot.py b/plot.py
new file mode 100755
index 0000000..5639bd9
--- /dev/null
+++ b/plot.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python3
+"""Plot training/test data. Channels appear side by side."""
+
+import os
+import argparse
+import json
+import random
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+from icenet import util
+    
+
+def plot_sample(outdir, sample, minmax, interactive=False):
+    fig, (ax1, ax2) = plt.subplots(1, 2, sharex='col', sharey='row')
+    fig.suptitle('\n'.join([
+        "Id: %s (original)" % sample.get('id'),
+        "Iceberg? %s" % sample.get('is_iceberg', '-'),
+        "Incident angle: %s" % repr(sample.get('inc_angle'))])
+    )
+
+    b1_min, b1_max, b2_min, b2_max, _, _ = minmax
+    b1 = (np.array(sample['band_1']).reshape(75, 75) - b1_min) / (b1_max - b1_min)
+    b2 = (np.array(sample['band_2']).reshape(75, 75) - b2_min) / (b2_max - b2_min)
+    ax1.imshow(b1, vmin=0.2, vmax=0.8, aspect='auto')
+    ax1.set_title("HH")
+    ax2.imshow(b2, vmin=0.2, vmax=0.8, aspect='auto')
+    ax2.set_title("HV")
+
+    #fig.tight_layout()
+    fig.subplots_adjust(top=0.80)
+    if interactive:
+        plt.show()
+    else:
+        fig.savefig(os.path.join(outdir, "%s.png" % sample['id']))
+    plt.close('all')
+
+
+def plot_angle_hist(outdir, samples, interactive=False):
+    angles = [s['inc_angle'] for s in samples if s['inc_angle'] != 'na']
+
+    hist, bins = np.histogram(angles, bins=50)
+    width = 0.7 * (bins[1] - bins[0])
+    center = (bins[:-1] + bins[1:]) / 2
+
+    fig = plt.figure()
+    plt.bar(center, hist, align='center', width=width)
+    plt.title("%d/%d valid angles" % (len(angles), len(samples)))
+    if interactive:
+        plt.show()
+    else:
+        fig.savefig(os.path.join(outdir, "angle_hist.png"))
+    plt.close('all')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+            '-o',
+            required=False,
+            help="Output directory for samples."
+    )
+    parser.add_argument(
+            '-n',
+            type=int,
+            required=False,
+            help="Number of samples to plot (randomized)"
+    )
+    parser.add_argument(
+            '-i',
+            required=False,
+            action='store_true',
+            help="Show plots (and don't store)"
+    )
+    parser.add_argument(
+            'samples_file',
+            help="JSON file with samples"
+    )
+    args = parser.parse_args()
+
+    outdir = args.o or '.'
+    if not os.path.isdir(outdir):
+        raise Exception("Output directory does not exist")
+
+    print("Loading samples ...")
+    with open(args.samples_file) as f:
+        samples = json.load(f)
+    print("%d samples in set" % len(samples))
+
+    if args.n:
+        print("Pick %d random samples ..." % args.n)
+        random.shuffle(samples)
+        samples = samples[0:args.n]
+
+    minmax = util.get_minmax(samples)
+    for i, s in enumerate(samples):
+        print("Plotting sample %d/%d" % (i+1, len(samples)))
+        plot_sample(outdir, s, minmax, interactive=args.i or False)
+
+    print("Plot angle histogram ..")
+    plot_angle_hist(outdir, samples, interactive=args.i or False)
+
+    print("Done.")
diff --git a/plot_augmented.py b/plot_augmented.py
new file mode 100755
index 0000000..2528290
--- /dev/null
+++ b/plot_augmented.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python3
+"""Plot image produced by data generators in icenet/utils.py"""
+
+import os
+import argparse
+import json
+import random
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+from icenet import util
+from plot import plot_sample
+from itertools import islice
+
+
+def plot_base_cnn_samples(outdir, samples, interactive=False):
+    m = len(samples)
+    minmax = util.get_minmax(samples)
+    imgs, _ = list(islice(util.base_cnn_generator(
+            samples, minmax, m, shuffle=False, verbose=True
+    ), 1))[0]
+    for i in range(m):
+        print("Plotting sample %d/%d" % (i+1, m))
+        sample = samples[i]
+        img = imgs[i, :, :, :]
+
+        fig, (ax1, ax2) = plt.subplots(1, 2, sharex='col', sharey='row')
+        fig.suptitle('\n'.join([
+            "Id: %s (rotated and cropped)" % sample.get('id'),
+            "Iceberg? %s" % sample.get('is_iceberg', '-'),
+            "Incident angle: %s" % repr(sample.get('inc_angle'))])
+        )
+
+        ax1.imshow(img[:, :, 0], vmin=0.2, vmax=0.8, aspect='auto')
+        ax1.set_title("HH")
+        ax2.imshow(img[:, :, 1], vmin=0.2, vmax=0.8, aspect='auto')
+        ax2.set_title("HV")
+
+        #fig.tight_layout()
+        fig.subplots_adjust(top=0.80)
+        plot_sample(outdir, sample, minmax, interactive=interactive)
+        if not interactive:
+            fig.savefig(os.path.join(outdir, "%s_base.png" % sample['id']))
+        plt.close('all')
+
+
+def plot_icenet_samples(outdir, samples, interactive=False):
+    m = len(samples)
+    minmax = util.get_minmax(samples)
+    sections, y = list(islice(util.icenet_generator(
+            samples, minmax, m, augment=False, verbose=True
+    ), 1))[0]
+    for i in range(m):
+        print("Plotting sample %d/%d" % (i+1, m))
+        sample = samples[i]
+
+        for b in range(2):
+            fig, axes = plt.subplots(3, 3, sharex='col', sharey='row')
+            fig.suptitle('\n'.join([
+                "Id: %s (rotated and cropped, band %d)" % (sample.get('id'), b+1),
+                "Iceberg? %s" % sample.get('is_iceberg', '-'),
+                "Incident angle: %s" % repr(sample.get('inc_angle'))])
+            )
+
+            for ridx in range(3):
+                for cidx in range(3):
+                    axes[ridx, cidx].imshow(
+                            sections[ridx*3+cidx][i, :, :, b], 
+                            vmin=0.2,
+                            vmax=0.8,
+                            aspect='auto'
+                    )
+
+            fig.subplots_adjust(top=0.80)
+            if not interactive:
+                fig.savefig(os.path.join(outdir, "%s_icenet_b%d.png" % (sample['id'], b)))
+
+        plot_sample(outdir, sample, minmax, interactive=interactive)
+        plt.close('all')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+            '-o',
+            required=False,
+            help="Output directory for samples."
+    )
+    parser.add_argument(
+            '-g',
+            required=True,
+            choices=['base', 'icenet'],
+            help="Generator to use (base|icenet)."
+    )
+    parser.add_argument(
+            '-n',
+            type=int,
+            required=False,
+            help="Number of samples to plot (randomized)"
+    )
+    parser.add_argument(
+            '-i',
+            required=False,
+            action='store_true',
+            help="Show plots (and don't store)"
+    )
+    parser.add_argument(
+            'samples_file',
+            help="JSON file with samples"
+    )
+    args = parser.parse_args()
+
+    outdir = args.o or '.'
+    if not os.path.isdir(outdir):
+        raise Exception("Output directory does not exist")
+
+    print("Loading samples ...")
+    with open(args.samples_file) as f:
+        samples = json.load(f)
+    print("%d samples in set" % len(samples))
+
+    if args.n:
+        print("Pick %d random samples ..." % args.n)
+        random.shuffle(samples)
+        samples = samples[0:args.n]
+
+    if args.g == 'base':
+        plot_base_cnn_samples(outdir, samples, interactive=args.i or False)
+    else:
+        plot_icenet_samples(outdir, samples, interactive=args.i or False)
+
+    print("Done.")
diff --git a/run_model.py b/run_model.py
new file mode 100755
index 0000000..2f05af4
--- /dev/null
+++ b/run_model.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python3
+"""Run a model."""
+
+import os
+import argparse
+from icenet import MODELS
+    
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+            '-d',
+            required=True,
+            help="Directory containing {train,test}.json."
+    )
+    parser.add_argument(
+            'target_fn',
+            help=("Target <module>.<fn> in the icenet package. "
+                  "The function must take the data directory as argument.")
+    )
+    args = parser.parse_args()
+    if not os.path.isdir(args.d):
+        raise Exception("Output directory does not exist")
+
+    model, fn = args.target_fn.split('.')
+    print("Running model %s ..." % model)
+    fn = getattr(MODELS[model], fn)
+    fn(args.d)
+    print("Done.")
diff --git a/run_model_docker.sh b/run_model_docker.sh
new file mode 100755
index 0000000..562d823
--- /dev/null
+++ b/run_model_docker.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+set -e
+
+docker build -t iceberg:$(cat VERSION) .
+docker run --name iceberg --rm -v $(pwd)/data:/data iceberg:$(cat VERSION) $@
diff --git a/show_predicted.sh b/show_predicted.sh
new file mode 100755
index 0000000..49d8582
--- /dev/null
+++ b/show_predicted.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+# Args:
+#   path to predictions (CSV)
+#   path to images
+
+tail -n+2 $1 | shuf | while read -r i; do
+  ID=$(echo $i | cut -d ',' -f1)
+  PRED=$(echo $i | cut -d ',' -f2)
+  echo "ID $ID: is_iceberg = $PRED" && gwenview $2/$ID.png 2>/dev/null
+done