Commit dad0031e authored by Vít Starý Novotný's avatar Vít Starý Novotný
Browse files

Update dockerfiles

parent 7e943d0e
Loading
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@ all: build

build:
	echo -e 'FROM scratch\nLABEL empty=""' | $(DOCKER_BUILD) -t ahisto/empty -
	$(DOCKER_BUILD) -t ahisto/ocr-eval 'https://gitlab.fi.muni.cz/xnovot32/ahisto-ocr-eval.git#5dfc0f9b'
	$(DOCKER_BUILD) -t ahisto/ocr-eval 'https://gitlab.fi.muni.cz/xnovot32/ahisto-ocr-eval.git#manylang'
	$(DOCKER_BUILD) -t ahisto/ocr-fileformat -f dockerfiles/ocr-fileformat.Dockerfile dockerfiles
	$(DOCKER_BUILD) -t ahisto/tesseract      -f dockerfiles/tesseract.Dockerfile      dockerfiles
	$(DOCKER_BUILD) -t ahisto/waifu2x        -f dockerfiles/waifu2x.Dockerfile        dockerfiles
+1 −1
Original line number Diff line number Diff line
@@ -91,7 +91,7 @@ def run_ocr(input_dir: Path, input_images: List[Path], output_dir: Path,
        run_tesseract(client, postprocessing_volume, tesseract_volume)

        if pero_ocr:
            run_pero_ocr(client, postprocessing_volume, pero_ocr_volume, gpus)
            run_pero_ocr(client, input_volume, pero_ocr_volume, gpus)
            combine_results(client, tesseract_volume, pero_ocr_volume, output_volume)
        else:
            LOGGER.info('Skipping PERO OCR')
+2 −2
Original line number Diff line number Diff line
FROM ubma/ocr-fileformat

RUN set -e -o xtrace \
  ; apk add parallel \
  ; parallel --will-cite \
  ; apk add --allow-untrusted parallel \
  ; parallel --will-cite
+4 −4
Original line number Diff line number Diff line
@@ -7,16 +7,16 @@ RUN set -e -o xtrace \
                                                parallel \
                                                wget \
  ; git clone https://github.com/cneud/page-to-text.git \
  ; cd pero-to-text \
  ; cd page-to-text \
  ; git checkout c4fcc9e \
  ; cd .. \
  ; git clone https://github.com/witiko/pero-ocr.git \
  ; git clone https://github.com/DCGM/pero-ocr.git \
  ; cd pero-ocr \
  ; git checkout feature/add-ahisto-ocr-script \
  ; git checkout 8908759 \
  ; wget -O- https://www.fit.vut.cz/~ihradis/pero/pero_eu_cz_print_newspapers_2020-10-09.tar.gz | tar xzv --strip-components=1 \
  ; pip install -U wheel pip \
  ; pip install protobuf==3.20.1 shapely==1.7.1 scikit-learn==1.0.2 \
  ; pip install . \
  ; pip install torchvision

ENTRYPOINT $PWD/ahisto-ocr-script.sh
WORKDIR pero-ocr
+1 −1
Original line number Diff line number Diff line
FROM ubuntu:21.04
FROM ubuntu:latest

ENV DEBIAN_FRONTEND=noninteractive \
    TERM=xterm