Compare commits

...

2 Commits

Author SHA1 Message Date
4c6b1a936d Add README 2021-08-18 22:28:04 +02:00
f0c665993a Refactor and add tesseract 2021-08-18 22:26:45 +02:00
5 changed files with 24 additions and 0 deletions

3
README.md Normal file
View File

@ -0,0 +1,3 @@
# OCR test grounds
Experimenting with open-source OCR software.

16
tesseract/Dockerfile Normal file
View File

@ -0,0 +1,16 @@
FROM ubuntu:18.04
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
RUN apt-get update && apt-get install -y software-properties-common && add-apt-repository -y ppa:alex-p/tesseract-ocr
RUN apt-get update && apt-get install -y \
imagemagick \
tesseract-ocr \
tesseract-ocr-eng \
tesseract-ocr-nor
COPY ocr.sh /bin/ocr.sh
RUN chmod +x /bin/ocr.sh
ENTRYPOINT ["tesseract"]

5
tesseract/ocr.sh Normal file
View File

@ -0,0 +1,5 @@
#!/bin/sh
convert -colorspace gray -fill white -resize 5000x -sharpen 0x1 $1 /tmp/image.jpg
tesseract -l nor /tmp/image.jpg /tmp/out
cat /tmp/out*