Compare commits
2 Commits
4db0055103
...
4c6b1a936d
| Author | SHA1 | Date | |
|---|---|---|---|
| 4c6b1a936d | |||
| f0c665993a |
3
README.md
Normal file
3
README.md
Normal file
@ -0,0 +1,3 @@
|
||||
# OCR test grounds
|
||||
|
||||
Experimenting with open-source OCR software.
|
||||
16
tesseract/Dockerfile
Normal file
16
tesseract/Dockerfile
Normal file
@ -0,0 +1,16 @@
|
||||
FROM ubuntu:18.04
|
||||
|
||||
ENV LANG C.UTF-8
|
||||
ENV LC_ALL C.UTF-8
|
||||
|
||||
RUN apt-get update && apt-get install -y software-properties-common && add-apt-repository -y ppa:alex-p/tesseract-ocr
|
||||
RUN apt-get update && apt-get install -y \
|
||||
imagemagick \
|
||||
tesseract-ocr \
|
||||
tesseract-ocr-eng \
|
||||
tesseract-ocr-nor
|
||||
|
||||
COPY ocr.sh /bin/ocr.sh
|
||||
RUN chmod +x /bin/ocr.sh
|
||||
|
||||
ENTRYPOINT ["tesseract"]
|
||||
5
tesseract/ocr.sh
Normal file
5
tesseract/ocr.sh
Normal file
@ -0,0 +1,5 @@
|
||||
#!/bin/sh
|
||||
|
||||
convert -colorspace gray -fill white -resize 5000x -sharpen 0x1 $1 /tmp/image.jpg
|
||||
tesseract -l nor /tmp/image.jpg /tmp/out
|
||||
cat /tmp/out*
|
||||
Loading…
x
Reference in New Issue
Block a user