Loading Makefile +1 −1 Original line number Diff line number Diff line Loading @@ -7,7 +7,7 @@ all: build build: echo -e 'FROM scratch\nLABEL empty=""' | $(DOCKER_BUILD) -t ahisto/empty - $(DOCKER_BUILD) -t ahisto/ocr-eval 'https://gitlab.fi.muni.cz/xnovot32/ahisto-ocr-eval.git#5dfc0f9b' $(DOCKER_BUILD) -t ahisto/ocr-eval 'https://gitlab.fi.muni.cz/xnovot32/ahisto-ocr-eval.git#manylang' $(DOCKER_BUILD) -t ahisto/ocr-fileformat -f dockerfiles/ocr-fileformat.Dockerfile dockerfiles $(DOCKER_BUILD) -t ahisto/tesseract -f dockerfiles/tesseract.Dockerfile dockerfiles $(DOCKER_BUILD) -t ahisto/waifu2x -f dockerfiles/waifu2x.Dockerfile dockerfiles Loading ahisto_ocr/cli.py +1 −1 Original line number Diff line number Diff line Loading @@ -91,7 +91,7 @@ def run_ocr(input_dir: Path, input_images: List[Path], output_dir: Path, run_tesseract(client, postprocessing_volume, tesseract_volume) if pero_ocr: run_pero_ocr(client, postprocessing_volume, pero_ocr_volume, gpus) run_pero_ocr(client, input_volume, pero_ocr_volume, gpus) combine_results(client, tesseract_volume, pero_ocr_volume, output_volume) else: LOGGER.info('Skipping PERO OCR') Loading dockerfiles/ocr-fileformat.Dockerfile +2 −2 Original line number Diff line number Diff line FROM ubma/ocr-fileformat RUN set -e -o xtrace \ ; apk add parallel \ ; parallel --will-cite \ ; apk add --allow-untrusted parallel \ ; parallel --will-cite dockerfiles/pero-ocr.Dockerfile +4 −4 Original line number Diff line number Diff line Loading @@ -7,16 +7,16 @@ RUN set -e -o xtrace \ parallel \ wget \ ; git clone https://github.com/cneud/page-to-text.git \ ; cd pero-to-text \ ; cd page-to-text \ ; git checkout c4fcc9e \ ; cd .. \ ; git clone https://github.com/witiko/pero-ocr.git \ ; git clone https://github.com/DCGM/pero-ocr.git \ ; cd pero-ocr \ ; git checkout feature/add-ahisto-ocr-script \ ; git checkout 8908759 \ ; wget -O- https://www.fit.vut.cz/~ihradis/pero/pero_eu_cz_print_newspapers_2020-10-09.tar.gz | tar xzv --strip-components=1 \ ; pip install -U wheel pip \ ; pip install protobuf==3.20.1 shapely==1.7.1 scikit-learn==1.0.2 \ ; pip install . \ ; pip install torchvision ENTRYPOINT $PWD/ahisto-ocr-script.sh WORKDIR pero-ocr dockerfiles/tesseract.Dockerfile +1 −1 Original line number Diff line number Diff line FROM ubuntu:21.04 FROM ubuntu:latest ENV DEBIAN_FRONTEND=noninteractive \ TERM=xterm Loading Loading
Makefile +1 −1 Original line number Diff line number Diff line Loading @@ -7,7 +7,7 @@ all: build build: echo -e 'FROM scratch\nLABEL empty=""' | $(DOCKER_BUILD) -t ahisto/empty - $(DOCKER_BUILD) -t ahisto/ocr-eval 'https://gitlab.fi.muni.cz/xnovot32/ahisto-ocr-eval.git#5dfc0f9b' $(DOCKER_BUILD) -t ahisto/ocr-eval 'https://gitlab.fi.muni.cz/xnovot32/ahisto-ocr-eval.git#manylang' $(DOCKER_BUILD) -t ahisto/ocr-fileformat -f dockerfiles/ocr-fileformat.Dockerfile dockerfiles $(DOCKER_BUILD) -t ahisto/tesseract -f dockerfiles/tesseract.Dockerfile dockerfiles $(DOCKER_BUILD) -t ahisto/waifu2x -f dockerfiles/waifu2x.Dockerfile dockerfiles Loading
ahisto_ocr/cli.py +1 −1 Original line number Diff line number Diff line Loading @@ -91,7 +91,7 @@ def run_ocr(input_dir: Path, input_images: List[Path], output_dir: Path, run_tesseract(client, postprocessing_volume, tesseract_volume) if pero_ocr: run_pero_ocr(client, postprocessing_volume, pero_ocr_volume, gpus) run_pero_ocr(client, input_volume, pero_ocr_volume, gpus) combine_results(client, tesseract_volume, pero_ocr_volume, output_volume) else: LOGGER.info('Skipping PERO OCR') Loading
dockerfiles/ocr-fileformat.Dockerfile +2 −2 Original line number Diff line number Diff line FROM ubma/ocr-fileformat RUN set -e -o xtrace \ ; apk add parallel \ ; parallel --will-cite \ ; apk add --allow-untrusted parallel \ ; parallel --will-cite
dockerfiles/pero-ocr.Dockerfile +4 −4 Original line number Diff line number Diff line Loading @@ -7,16 +7,16 @@ RUN set -e -o xtrace \ parallel \ wget \ ; git clone https://github.com/cneud/page-to-text.git \ ; cd pero-to-text \ ; cd page-to-text \ ; git checkout c4fcc9e \ ; cd .. \ ; git clone https://github.com/witiko/pero-ocr.git \ ; git clone https://github.com/DCGM/pero-ocr.git \ ; cd pero-ocr \ ; git checkout feature/add-ahisto-ocr-script \ ; git checkout 8908759 \ ; wget -O- https://www.fit.vut.cz/~ihradis/pero/pero_eu_cz_print_newspapers_2020-10-09.tar.gz | tar xzv --strip-components=1 \ ; pip install -U wheel pip \ ; pip install protobuf==3.20.1 shapely==1.7.1 scikit-learn==1.0.2 \ ; pip install . \ ; pip install torchvision ENTRYPOINT $PWD/ahisto-ocr-script.sh WORKDIR pero-ocr
dockerfiles/tesseract.Dockerfile +1 −1 Original line number Diff line number Diff line FROM ubuntu:21.04 FROM ubuntu:latest ENV DEBIAN_FRONTEND=noninteractive \ TERM=xterm Loading