Test easyocr
This commit is contained in:
commit
4db0055103
60
bad_ocr.py
Executable file
60
bad_ocr.py
Executable file
@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Extract text from images."""
|
||||
|
||||
import os
|
||||
import easyocr
|
||||
import argparse
|
||||
import tqdm
|
||||
from toolz import compose, curry
|
||||
from toolz.curried import map, filter
|
||||
|
||||
|
||||
IMG_EXT = (
|
||||
'.png',
|
||||
'.jpeg',
|
||||
'.jpg',
|
||||
'.apng',
|
||||
'.webp',
|
||||
'.avif'
|
||||
)
|
||||
|
||||
def list_images(folder):
|
||||
_, _, files = next(os.walk(folder))
|
||||
return compose(
|
||||
map(lambda f: os.path.join(folder, f)),
|
||||
filter(lambda f: os.path.splitext(f)[-1].lower() in IMG_EXT)
|
||||
)(files)
|
||||
|
||||
@curry
|
||||
def ocr(reader, img_filename):
|
||||
result = reader.readtext(img_filename, detail=0)
|
||||
basename, _ = os.path.splitext(img_filename)
|
||||
txt_filename = basename + '.txt'
|
||||
with open(txt_filename, 'w') as f:
|
||||
f.write(' '.join(result) + '\n')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
'dir',
|
||||
help="input dir"
|
||||
)
|
||||
parser.add_argument(
|
||||
'-l', '--lang',
|
||||
default='no,en',
|
||||
help="comma-separated list of language codes"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
print("Loading model ...")
|
||||
ocr_reader = easyocr.Reader(args.lang.split(','))
|
||||
print("Run")
|
||||
compose(
|
||||
list,
|
||||
tqdm.tqdm,
|
||||
map(ocr(ocr_reader)),
|
||||
list_images
|
||||
)(args.dir)
|
||||
print("Done.")
|
||||
|
||||
24
requirements.pip
Normal file
24
requirements.pip
Normal file
@ -0,0 +1,24 @@
|
||||
cycler==0.10.0
|
||||
decorator==4.4.2
|
||||
easyocr==1.3.2
|
||||
imageio==2.9.0
|
||||
kiwisolver==1.3.1
|
||||
matplotlib==3.4.2
|
||||
networkx==2.5.1
|
||||
numpy==1.21.1
|
||||
opencv-python==4.5.3.56
|
||||
Pillow==8.3.1
|
||||
pyparsing==2.4.7
|
||||
python-bidi==0.4.2
|
||||
python-dateutil==2.8.2
|
||||
PyWavelets==1.1.1
|
||||
PyYAML==5.4.1
|
||||
scikit-image==0.18.2
|
||||
scipy==1.7.0
|
||||
six==1.16.0
|
||||
tifffile==2021.7.2
|
||||
toolz==0.11.1
|
||||
torch==1.9.0
|
||||
torchvision==0.10.0
|
||||
tqdm==4.61.2
|
||||
typing-extensions==3.10.0.0
|
||||
Loading…
x
Reference in New Issue
Block a user