diff --git a/photocat.ipynb b/photocat.ipynb
new file mode 100644
index 0000000..c37cdf8
--- /dev/null
+++ b/photocat.ipynb
@@ -0,0 +1,133 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Automatic photo categorization\n",
+ "\n",
+ "\n",
+ "Goals:\n",
+ " - Categorize photos into semantically similar groups.\n",
+ " - Mark similar photos for removal.\n",
+ "\n",
+ "\n",
+ "## Table of contents\n",
+ " 1. [Features](#features)\n",
+ " 2. [Clustering](#clustering)\n",
+ " 3. [Deduplication](#deduplication)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%load_ext autoreload\n",
+ "%autoreload 2\n",
+ "\n",
+ "import matplotlib.pyplot as plt\n",
+ "%matplotlib inline\n",
+ "\n",
+ "from tqdm import tqdm\n",
+ "#from tqdm.notebook import tqdm\n",
+ "\n",
+ "from toolz import compose\n",
+ "from toolz.curried import map, filter"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from photocat import fs, photo, group\n",
+ "\n",
+ "INPUT_DIR = 'data/photos'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "## Features\n",
+ "\n",
+ "Extract features from EXIF data and YOLOv3 output."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def show_photos(photos, n_row, n_col, size=4):\n",
+ " _, axs = plt.subplots(n_row, n_col, figsize=(n_col*size, n_row*size))\n",
+ " axs = axs.flatten()\n",
+ " for p, ax in zip(photos, axs):\n",
+ " ax.imshow(p.thumbnail)\n",
+ " plt.show()\n",
+ "\n",
+ "photos = compose(\n",
+ " list,\n",
+ " tqdm,\n",
+ " map(lambda f: photo.Photo(f)),\n",
+ " fs.list_images\n",
+ ")(INPUT_DIR)\n",
+ "\n",
+ "show_photos(photos[0:24], 6, 4)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "## Clustering\n",
+ "\n",
+ "Normalize features and cluster with DBSCAN."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "## Deduplication\n",
+ "\n",
+ "Use eucledian distance between outputs of topmost YOLOv3 layers as a metric for photo similarity."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}