Compare commits
No commits in common. "a71f1f1d50af2a7377bed0a427a2847e98d79737" and "58cfd41f438a593f7f3ef4a85e5e76b1d0cb6e9b" have entirely different histories.
a71f1f1d50
...
58cfd41f43
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,5 +1,2 @@
|
|||||||
__pycache__
|
__pycache__
|
||||||
venv
|
venv
|
||||||
yolov3
|
|
||||||
data/photos
|
|
||||||
.*
|
|
||||||
|
|||||||
13
Makefile
13
Makefile
@ -1,13 +0,0 @@
|
|||||||
WEIGHTS = yolov3-tiny
|
|
||||||
#WEIGHTS = yolov3
|
|
||||||
|
|
||||||
photocat :
|
|
||||||
echo "Download YOLOv3 network $(WEIGHTS)"
|
|
||||||
wget "https://pjreddie.com/media/files/$(WEIGHTS).weights" --header "Referer: pjreddie.com" -P yolov3
|
|
||||||
wget "https://raw.githubusercontent.com/eriklindernoren/PyTorch-YOLOv3/master/config/$(WEIGHTS).cfg" -P yolov3
|
|
||||||
echo "Install with pip etc TODO"
|
|
||||||
|
|
||||||
clean :
|
|
||||||
rm -f yolov3/$(WEIGHTS).{weights,cfg}
|
|
||||||
|
|
||||||
.PHONY : photocat clean
|
|
||||||
133
photocat.ipynb
133
photocat.ipynb
@ -1,133 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Automatic photo categorization\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"Goals:\n",
|
|
||||||
" - Categorize photos into semantically similar groups.\n",
|
|
||||||
" - Mark similar photos for removal.\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"## Table of contents\n",
|
|
||||||
" 1. [Features](#features)\n",
|
|
||||||
" 2. [Clustering](#clustering)\n",
|
|
||||||
" 3. [Deduplication](#deduplication)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%load_ext autoreload\n",
|
|
||||||
"%autoreload 2\n",
|
|
||||||
"\n",
|
|
||||||
"import matplotlib.pyplot as plt\n",
|
|
||||||
"%matplotlib inline\n",
|
|
||||||
"\n",
|
|
||||||
"from tqdm import tqdm\n",
|
|
||||||
"#from tqdm.notebook import tqdm\n",
|
|
||||||
"\n",
|
|
||||||
"from toolz import compose\n",
|
|
||||||
"from toolz.curried import map, filter"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from photocat import fs, photo, group\n",
|
|
||||||
"\n",
|
|
||||||
"INPUT_DIR = 'data/photos'"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"<a name=\"features\"></a>\n",
|
|
||||||
"## Features\n",
|
|
||||||
"\n",
|
|
||||||
"Extract features from EXIF data and YOLOv3 output."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def show_photos(photos, n_row, n_col, size=4):\n",
|
|
||||||
" _, axs = plt.subplots(n_row, n_col, figsize=(n_col*size, n_row*size))\n",
|
|
||||||
" axs = axs.flatten()\n",
|
|
||||||
" for p, ax in zip(photos, axs):\n",
|
|
||||||
" ax.imshow(p.thumbnail)\n",
|
|
||||||
" plt.show()\n",
|
|
||||||
"\n",
|
|
||||||
"photos = compose(\n",
|
|
||||||
" list,\n",
|
|
||||||
" tqdm,\n",
|
|
||||||
" map(lambda f: photo.Photo(f)),\n",
|
|
||||||
" fs.list_images\n",
|
|
||||||
")(INPUT_DIR)\n",
|
|
||||||
"\n",
|
|
||||||
"show_photos(photos[0:24], 6, 4)\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"<a name=\"clustering\"></a>\n",
|
|
||||||
"## Clustering\n",
|
|
||||||
"\n",
|
|
||||||
"Normalize features and cluster with DBSCAN."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"<a name=\"deduplication\"></a>\n",
|
|
||||||
"## Deduplication\n",
|
|
||||||
"\n",
|
|
||||||
"Use eucledian distance between outputs of topmost YOLOv3 layers as a metric for photo similarity."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3 (ipykernel)",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.9.6"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
@ -1,5 +1,4 @@
|
|||||||
import os
|
import os
|
||||||
import datetime
|
|
||||||
from toolz import compose
|
from toolz import compose
|
||||||
from toolz.curried import filter, map
|
from toolz.curried import filter, map
|
||||||
|
|
||||||
@ -21,9 +20,3 @@ def list_images(folder):
|
|||||||
map(lambda f: os.path.join(folder, f)),
|
map(lambda f: os.path.join(folder, f)),
|
||||||
filter(lambda f: os.path.splitext(f)[-1].lower() in IMG_EXT)
|
filter(lambda f: os.path.splitext(f)[-1].lower() in IMG_EXT)
|
||||||
)(files)
|
)(files)
|
||||||
|
|
||||||
|
|
||||||
def last_modified(filename):
|
|
||||||
epoch = os.path.getmtime(filename)
|
|
||||||
return datetime.datetime.fromtimestamp(epoch)
|
|
||||||
|
|
||||||
|
|||||||
@ -1,33 +1,30 @@
|
|||||||
import datetime
|
from collections import namedtuple
|
||||||
from dataclasses import dataclass
|
|
||||||
from itertools import groupby
|
from itertools import groupby
|
||||||
from toolz import curry, compose
|
from toolz import curry, compose
|
||||||
from toolz.curried import map, filter
|
from toolz.curried import map, filter
|
||||||
|
|
||||||
from photocat.photo import Photo
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(init = False)
|
PhotoGroup = namedtuple(
|
||||||
class PhotoGroup:
|
'PhotoGroup',
|
||||||
name: str
|
['name', 'datetimes', 'photos']
|
||||||
min_datetime: datetime.datetime
|
)
|
||||||
max_datetime: datetime.datetime
|
|
||||||
photos: list[Photo]
|
|
||||||
|
|
||||||
def __init__(self, photos: list[Photo]):
|
|
||||||
self.min_datetime = min(photos, key=lambda p: p.datetime).datetime
|
|
||||||
self.max_datetime = max(photos, key=lambda p: p.datetime).datetime
|
|
||||||
self.name = str(self.min_datetime) + '-' + str(self.max_datetime) # TODO
|
|
||||||
self.photos = photos
|
|
||||||
|
|
||||||
|
|
||||||
@curry
|
@curry
|
||||||
def _group(key, photos):
|
def _group(key, photos):
|
||||||
|
def create_group(k, photos):
|
||||||
|
min_dt = min(photos, key=lambda p: p.datetime).datetime
|
||||||
|
max_dt = max(photos, key=lambda p: p.datetime).datetime
|
||||||
|
name = str(min_dt) + '-' + str(max_dt) # TODO
|
||||||
|
return PhotoGroup(name, (min_dt, max_dt), photos)
|
||||||
|
|
||||||
return [
|
return [
|
||||||
PhotoGroup(list(v))
|
create_group(k, list(v))
|
||||||
for _, v in groupby(sorted(photos, key=key), key=key)
|
for k, v in groupby(sorted(photos, key=key), key=key)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
photos_by_month = _group(lambda p: (p.datetime.year, p.datetime.month))
|
photos_by_month = _group(lambda p: p.datetime.month)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
34
photocat/image.py
Normal file
34
photocat/image.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
import io
|
||||||
|
from PIL import Image, ExifTags
|
||||||
|
|
||||||
|
|
||||||
|
def read(filename, resize=None):
|
||||||
|
"""Read and optionally resize an image."""
|
||||||
|
img = Image.open(filename)
|
||||||
|
cur_width, cur_height = img.size
|
||||||
|
if resize:
|
||||||
|
new_width, new_height = resize
|
||||||
|
scale = min(new_height/cur_height, new_width/cur_width)
|
||||||
|
img = img.resize((int(cur_width*scale), int(cur_height*scale)), Image.ANTIALIAS)
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
|
def read_exif(filename):
|
||||||
|
"""Read EXIF data."""
|
||||||
|
img = Image.open(filename)
|
||||||
|
exif = img.getexif()
|
||||||
|
if exif is None:
|
||||||
|
raise Exception("No EXIF data for image %s" % filename)
|
||||||
|
return {
|
||||||
|
ExifTags.TAGS[k]: v
|
||||||
|
for k, v in exif.items()
|
||||||
|
if k in ExifTags.TAGS
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def to_bytes(img):
|
||||||
|
"""Convert image to PNG format and return as byte-string object."""
|
||||||
|
bio = io.BytesIO()
|
||||||
|
img.save(bio, format="PNG")
|
||||||
|
return bio.getvalue()
|
||||||
|
|
||||||
29
photocat/main.py
Executable file → Normal file
29
photocat/main.py
Executable file → Normal file
@ -1,22 +1,24 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
#import PySimpleGUI as sg
|
#import PySimpleGUI as sg
|
||||||
import PySimpleGUIQt as sg
|
import PySimpleGUIQt as sg
|
||||||
import os
|
import os
|
||||||
from toolz import compose
|
from toolz import compose
|
||||||
from toolz.curried import map
|
from toolz.curried import map
|
||||||
|
|
||||||
from photocat import fs, photo, group
|
import fs
|
||||||
|
import image
|
||||||
|
import photo
|
||||||
|
import group
|
||||||
|
|
||||||
|
|
||||||
MAX_ROWS = 100
|
MAX_ROWS = 100
|
||||||
MAX_COLS = 4
|
MAX_COLS = 5
|
||||||
|
IMG_SIZE = (100, 100)
|
||||||
|
|
||||||
NA_FILENAME = os.path.join(
|
NA_FILE = os.path.join(
|
||||||
os.path.dirname(__file__),
|
os.path.dirname(__file__),
|
||||||
'na.jpg'
|
'na.jpg'
|
||||||
)
|
)
|
||||||
NA_PHOTO = photo.Photo(NA_FILENAME)
|
NA_IMG = image.to_bytes(image.read(NA_FILE, resize=IMG_SIZE))
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@ -25,12 +27,12 @@ def main():
|
|||||||
[sg.Listbox(values=[], enable_events=True, size=(40,20),key='GROUP LIST')]
|
[sg.Listbox(values=[], enable_events=True, size=(40,20),key='GROUP LIST')]
|
||||||
]
|
]
|
||||||
image_view = [
|
image_view = [
|
||||||
[sg.Image(key='PHOTO %d' % (i*MAX_COLS+j), data=NA_PHOTO.to_bytes(), visible=False, enable_events=True) for j in range(MAX_COLS)]
|
[sg.Image(key='PHOTO %d' % (i*MAX_COLS+j), data=NA_IMG, visible=False, enable_events=True) for j in range(MAX_COLS)]
|
||||||
for i in range(MAX_ROWS)
|
for i in range(MAX_ROWS)
|
||||||
]
|
]
|
||||||
group_view = [
|
group_view = [
|
||||||
[sg.Text('Group: ')],
|
[sg.Text('Group: ')],
|
||||||
[sg.Column(image_view, scrollable=True, size=(900, 700), element_justification='l')]
|
[sg.Column(image_view, scrollable=True, size=(650, 700), element_justification='l')]
|
||||||
]
|
]
|
||||||
|
|
||||||
layout = [[
|
layout = [[
|
||||||
@ -50,7 +52,7 @@ def main():
|
|||||||
# Process input photos into groups
|
# Process input photos into groups
|
||||||
groups = compose(
|
groups = compose(
|
||||||
group.photos_by_month,
|
group.photos_by_month,
|
||||||
map(lambda f: photo.Photo(f)),
|
map(photo.read_photo),
|
||||||
fs.list_images
|
fs.list_images
|
||||||
)(values['FOLDER'])
|
)(values['FOLDER'])
|
||||||
window['GROUP LIST'].update(values=[g.name for g in groups])
|
window['GROUP LIST'].update(values=[g.name for g in groups])
|
||||||
@ -62,13 +64,16 @@ def main():
|
|||||||
# Assert number of photos
|
# Assert number of photos
|
||||||
n_photos = len(current_group.photos)
|
n_photos = len(current_group.photos)
|
||||||
assert n_photos <= MAX_ROWS*MAX_COLS
|
assert n_photos <= MAX_ROWS*MAX_COLS
|
||||||
# Update image view
|
# Reset image view
|
||||||
for idx in range(MAX_ROWS*MAX_COLS):
|
for idx in range(MAX_ROWS*MAX_COLS):
|
||||||
if idx < n_photos:
|
if idx < n_photos:
|
||||||
p = current_group.photos[idx]
|
window['PHOTO %d' % idx].update(data=NA_IMG, visible=True)
|
||||||
window['PHOTO %d' % idx].update(data=p.to_bytes(), visible=True)
|
|
||||||
else:
|
else:
|
||||||
window['PHOTO %d' % idx].update(visible=False)
|
window['PHOTO %d' % idx].update(visible=False)
|
||||||
|
# Load and display images
|
||||||
|
for idx, p in enumerate(current_group.photos):
|
||||||
|
img_data = image.to_bytes(image.read(p.filename, resize=IMG_SIZE))
|
||||||
|
window['PHOTO %d' % idx].update(data=img_data)
|
||||||
elif event.startswith('PHOTO'):
|
elif event.startswith('PHOTO'):
|
||||||
idx = int(event.split(' ')[-1])
|
idx = int(event.split(' ')[-1])
|
||||||
print("Selected photo %d" % idx)
|
print("Selected photo %d" % idx)
|
||||||
|
|||||||
@ -1,62 +1,32 @@
|
|||||||
|
import os
|
||||||
import datetime
|
import datetime
|
||||||
import io
|
from collections import namedtuple
|
||||||
from dataclasses import dataclass
|
from dateutil import parser
|
||||||
from PIL import Image, ExifTags
|
|
||||||
|
|
||||||
from photocat import fs
|
from image import read_exif
|
||||||
|
|
||||||
|
|
||||||
IMG_SIZE = (200, 200)
|
Photo = namedtuple(
|
||||||
|
'Photo',
|
||||||
EXIF_DATETIME_KEY = 'DateTime'
|
['filename', 'datetime', 'exif', 'features', 'selected']
|
||||||
EXIF_DATETIME_FORMAT = '%Y:%m:%d %H:%M:%S'
|
)
|
||||||
|
|
||||||
|
|
||||||
def _exif_dt(exif):
|
def _exif_dt(exif):
|
||||||
try:
|
try:
|
||||||
return datetime.datetime.strptime(
|
return parser.parse(exif['DateTimeOriginal'])
|
||||||
exif[EXIF_DATETIME_KEY],
|
|
||||||
EXIF_DATETIME_FORMAT
|
|
||||||
)
|
|
||||||
except Exception:
|
except Exception:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _last_modified_dt(filename):
|
||||||
|
epoch = os.path.getmtime(filename)
|
||||||
|
return datetime.datetime.fromtimestamp(epoch)
|
||||||
|
|
||||||
@dataclass(init = False)
|
|
||||||
class Photo:
|
|
||||||
filename: str
|
|
||||||
exif: dict
|
|
||||||
datetime: datetime.datetime
|
|
||||||
thumbnail: Image
|
|
||||||
features: list
|
|
||||||
selected: bool = True
|
|
||||||
|
|
||||||
def __init__(self, filename: str):
|
def read_photo(filename):
|
||||||
self.filename = filename
|
exif = read_exif(filename)
|
||||||
img = Image.open(filename)
|
print(filename, exif)
|
||||||
|
dt = _exif_dt(exif) or _last_modified_dt(filename)
|
||||||
exif = img.getexif()
|
features = [] # TODO
|
||||||
if exif is None:
|
return Photo(filename, dt, exif, features, True)
|
||||||
raise Exception("No EXIF data for image %s" % filename)
|
|
||||||
self.exif = {
|
|
||||||
ExifTags.TAGS[k]: v
|
|
||||||
for k, v in exif.items()
|
|
||||||
if k in ExifTags.TAGS
|
|
||||||
}
|
|
||||||
|
|
||||||
self.datetime = _exif_dt(self.exif) or fs.last_modified(filename)
|
|
||||||
|
|
||||||
cur_width, cur_height = img.size
|
|
||||||
new_width, new_height = IMG_SIZE
|
|
||||||
scale = min(new_height/cur_height, new_width/cur_width)
|
|
||||||
self.thumbnail = img.resize((int(cur_width*scale), int(cur_height*scale)), Image.ANTIALIAS)
|
|
||||||
|
|
||||||
self.features = [] # TODO
|
|
||||||
print("Loaded", filename, "at", self.datetime, "with exif", self.exif)
|
|
||||||
|
|
||||||
def to_bytes(self) -> bytes:
|
|
||||||
"""Convert image to PNG format and return as byte-string object."""
|
|
||||||
bio = io.BytesIO()
|
|
||||||
self.thumbnail.save(bio, format="PNG")
|
|
||||||
return bio.getvalue()
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user