diff --git a/bad_ocr.py b/easyocr/bad_ocr.py similarity index 100% rename from bad_ocr.py rename to easyocr/bad_ocr.py diff --git a/requirements.pip b/easyocr/requirements.pip similarity index 100% rename from requirements.pip rename to easyocr/requirements.pip diff --git a/tesseract/Dockerfile b/tesseract/Dockerfile new file mode 100644 index 0000000..bc7b9c0 --- /dev/null +++ b/tesseract/Dockerfile @@ -0,0 +1,16 @@ +FROM ubuntu:18.04 + +ENV LANG C.UTF-8 +ENV LC_ALL C.UTF-8 + +RUN apt-get update && apt-get install -y software-properties-common && add-apt-repository -y ppa:alex-p/tesseract-ocr +RUN apt-get update && apt-get install -y \ + imagemagick \ + tesseract-ocr \ + tesseract-ocr-eng \ + tesseract-ocr-nor + +COPY ocr.sh /bin/ocr.sh +RUN chmod +x /bin/ocr.sh + +ENTRYPOINT ["tesseract"] diff --git a/tesseract/ocr.sh b/tesseract/ocr.sh new file mode 100644 index 0000000..c5e4e3b --- /dev/null +++ b/tesseract/ocr.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +convert -colorspace gray -fill white -resize 5000x -sharpen 0x1 $1 /tmp/image.jpg +tesseract -l nor /tmp/image.jpg /tmp/out +cat /tmp/out*