Add notebook
This commit is contained in:
parent
68d679d6ce
commit
168d0c689f
133
photocat.ipynb
Normal file
133
photocat.ipynb
Normal file
@ -0,0 +1,133 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Automatic photo categorization\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Goals:\n",
|
||||
" - Categorize photos into semantically similar groups.\n",
|
||||
" - Mark similar photos for removal.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Table of contents\n",
|
||||
" 1. [Features](#features)\n",
|
||||
" 2. [Clustering](#clustering)\n",
|
||||
" 3. [Deduplication](#deduplication)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%load_ext autoreload\n",
|
||||
"%autoreload 2\n",
|
||||
"\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"%matplotlib inline\n",
|
||||
"\n",
|
||||
"from tqdm import tqdm\n",
|
||||
"#from tqdm.notebook import tqdm\n",
|
||||
"\n",
|
||||
"from toolz import compose\n",
|
||||
"from toolz.curried import map, filter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from photocat import fs, photo, group\n",
|
||||
"\n",
|
||||
"INPUT_DIR = 'data/photos'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"features\"></a>\n",
|
||||
"## Features\n",
|
||||
"\n",
|
||||
"Extract features from EXIF data and YOLOv3 output."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def show_photos(photos, n_row, n_col, size=4):\n",
|
||||
" _, axs = plt.subplots(n_row, n_col, figsize=(n_col*size, n_row*size))\n",
|
||||
" axs = axs.flatten()\n",
|
||||
" for p, ax in zip(photos, axs):\n",
|
||||
" ax.imshow(p.thumbnail)\n",
|
||||
" plt.show()\n",
|
||||
"\n",
|
||||
"photos = compose(\n",
|
||||
" list,\n",
|
||||
" tqdm,\n",
|
||||
" map(lambda f: photo.Photo(f)),\n",
|
||||
" fs.list_images\n",
|
||||
")(INPUT_DIR)\n",
|
||||
"\n",
|
||||
"show_photos(photos[0:24], 6, 4)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"clustering\"></a>\n",
|
||||
"## Clustering\n",
|
||||
"\n",
|
||||
"Normalize features and cluster with DBSCAN."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"deduplication\"></a>\n",
|
||||
"## Deduplication\n",
|
||||
"\n",
|
||||
"Use eucledian distance between outputs of topmost YOLOv3 layers as a metric for photo similarity."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user