Loading ahisto_ocr/cli.py +1 −1 Original line number Diff line number Diff line Loading @@ -50,7 +50,7 @@ def main(input_dir: str, output_dir: str, pero_ocr: bool, super_resolution: bool LOGGER.info(f'Pulling image {name}') client.images.pull(name) check_docker_image('scratch') check_docker_image('gitlab.fi.muni.cz:5050/nlp/ahisto-modules/empty') check_docker_image('gitlab.fi.muni.cz:5050/nlp/ahisto-modules/ocr-eval') check_docker_image('gitlab.fi.muni.cz:5050/nlp/ahisto-modules/ocr-fileformat') check_docker_image('gitlab.fi.muni.cz:5050/nlp/ahisto-modules/ocr-tesseract') Loading ahisto_ocr/ocr.py +3 −3 Original line number Diff line number Diff line Loading @@ -30,7 +30,7 @@ def run_tesseract(client, preprocessing_volume, second_pass_volume) -> None: 'mode': 'rw', }, } with create_temporary_docker_container(client, 'scratch', command='cmd', with create_temporary_docker_container(client, 'gitlab.fi.muni.cz:5050/nlp/ahisto-modules/empty', command='cmd', volumes=volumes) as container: text = extract_text_file_from_container(container, '/input/list.txt') add_text_file_to_container(container, '/output/list.txt', text) Loading Loading @@ -135,7 +135,7 @@ def run_pero_ocr(client, preprocessing_volume, pero_ocr_volume, gpus: str) -> No run_docker_container(client, 'gitlab.fi.muni.cz:5050/nlp/ahisto-modules/ocr-pero', command=command, volumes=volumes, environment={'LC_ALL': 'C'}) with create_temporary_docker_container(client, 'scratch', command='cmd', with create_temporary_docker_container(client, 'gitlab.fi.muni.cz:5050/nlp/ahisto-modules/empty', command='cmd', volumes=volumes) as container: text = extract_text_file_from_container(container, '/input/list.txt') add_text_file_to_container(container, '/output/list.txt', text) Loading Loading @@ -165,7 +165,7 @@ def combine_results(client, tesseract_volume, pero_ocr_volume, output_volume): ] run_docker_container(client, 'gitlab.fi.muni.cz:5050/nlp/ahisto-modules/ocr-eval', command=command, volumes=volumes) with create_temporary_docker_container(client, 'scratch', command='cmd', with create_temporary_docker_container(client, 'gitlab.fi.muni.cz:5050/nlp/ahisto-modules/empty', command='cmd', volumes=volumes) as container: text = extract_text_file_from_container(container, '/pero-ocr/list.txt') add_text_file_to_container(container, '/output/list.txt', text) ahisto_ocr/preprocessing.py +2 −2 Original line number Diff line number Diff line Loading @@ -34,7 +34,7 @@ def apply_super_resolution(client, input_volume, postprocessing_volume, gpus: st ] run_docker_container(client, 'gitlab.fi.muni.cz:5050/nlp/ahisto-modules/ocr-eval', command=command, volumes=volumes) with create_temporary_docker_container(client, 'scratch', command='cmd', with create_temporary_docker_container(client, 'gitlab.fi.muni.cz:5050/nlp/ahisto-modules/empty', command='cmd', volumes=volumes) as container: text = extract_text_file_from_container(container, '/input/list.txt') add_text_file_to_container(container, '/output/list.txt', text) Loading @@ -55,7 +55,7 @@ def apply_super_resolution(client, input_volume, postprocessing_volume, gpus: st environment={'NVIDIA_VISIBLE_DEVICES': gpus}, command=command, volumes=volumes) with create_temporary_docker_container(client, 'scratch', command='cmd', with create_temporary_docker_container(client, 'gitlab.fi.muni.cz:5050/nlp/ahisto-modules/empty', command='cmd', volumes=volumes) as container: filenames = [ '{}_noise_scale.png'.format(Path(filename).with_suffix('')) Loading ahisto_ocr/volume.py +2 −2 Original line number Diff line number Diff line Loading @@ -14,7 +14,7 @@ LOGGER = getLogger(__name__) def copy_input_to(client, volume, input_dir: Path, input_images: List[Path]) -> None: LOGGER.info(f'Copying {len(input_images)} input images from {input_dir}/ to Docker volume {volume.short_id}') volumes = {volume.name: {'bind': '/input', 'mode': 'rw'}} with create_temporary_docker_container(client, 'scratch', command='cmd', with create_temporary_docker_container(client, 'gitlab.fi.muni.cz:5050/nlp/ahisto-modules/empty', command='cmd', volumes=volumes) as container: with TemporaryFile('w+b') as tf: with tarfile.open(fileobj=tf, mode='w') as tar: Loading @@ -32,7 +32,7 @@ def copy_input_to(client, volume, input_dir: Path, input_images: List[Path]) -> def copy_output_from(client, volume, input_images: List[Path], output_dir: Path) -> None: LOGGER.info(f'Copying {len(input_images)} OCR texts from Docker volume {volume.short_id} to {output_dir}/') volumes = {volume.name: {'bind': '/output', 'mode': 'ro'}} with create_temporary_docker_container(client, 'scratch', command='cmd', with create_temporary_docker_container(client, 'gitlab.fi.muni.cz:5050/nlp/ahisto-modules/empty', command='cmd', volumes=volumes) as container: with TemporaryFile('w+b') as tf: for chunk in container.get_archive('/output')[0]: Loading Loading
ahisto_ocr/cli.py +1 −1 Original line number Diff line number Diff line Loading @@ -50,7 +50,7 @@ def main(input_dir: str, output_dir: str, pero_ocr: bool, super_resolution: bool LOGGER.info(f'Pulling image {name}') client.images.pull(name) check_docker_image('scratch') check_docker_image('gitlab.fi.muni.cz:5050/nlp/ahisto-modules/empty') check_docker_image('gitlab.fi.muni.cz:5050/nlp/ahisto-modules/ocr-eval') check_docker_image('gitlab.fi.muni.cz:5050/nlp/ahisto-modules/ocr-fileformat') check_docker_image('gitlab.fi.muni.cz:5050/nlp/ahisto-modules/ocr-tesseract') Loading
ahisto_ocr/ocr.py +3 −3 Original line number Diff line number Diff line Loading @@ -30,7 +30,7 @@ def run_tesseract(client, preprocessing_volume, second_pass_volume) -> None: 'mode': 'rw', }, } with create_temporary_docker_container(client, 'scratch', command='cmd', with create_temporary_docker_container(client, 'gitlab.fi.muni.cz:5050/nlp/ahisto-modules/empty', command='cmd', volumes=volumes) as container: text = extract_text_file_from_container(container, '/input/list.txt') add_text_file_to_container(container, '/output/list.txt', text) Loading Loading @@ -135,7 +135,7 @@ def run_pero_ocr(client, preprocessing_volume, pero_ocr_volume, gpus: str) -> No run_docker_container(client, 'gitlab.fi.muni.cz:5050/nlp/ahisto-modules/ocr-pero', command=command, volumes=volumes, environment={'LC_ALL': 'C'}) with create_temporary_docker_container(client, 'scratch', command='cmd', with create_temporary_docker_container(client, 'gitlab.fi.muni.cz:5050/nlp/ahisto-modules/empty', command='cmd', volumes=volumes) as container: text = extract_text_file_from_container(container, '/input/list.txt') add_text_file_to_container(container, '/output/list.txt', text) Loading Loading @@ -165,7 +165,7 @@ def combine_results(client, tesseract_volume, pero_ocr_volume, output_volume): ] run_docker_container(client, 'gitlab.fi.muni.cz:5050/nlp/ahisto-modules/ocr-eval', command=command, volumes=volumes) with create_temporary_docker_container(client, 'scratch', command='cmd', with create_temporary_docker_container(client, 'gitlab.fi.muni.cz:5050/nlp/ahisto-modules/empty', command='cmd', volumes=volumes) as container: text = extract_text_file_from_container(container, '/pero-ocr/list.txt') add_text_file_to_container(container, '/output/list.txt', text)
ahisto_ocr/preprocessing.py +2 −2 Original line number Diff line number Diff line Loading @@ -34,7 +34,7 @@ def apply_super_resolution(client, input_volume, postprocessing_volume, gpus: st ] run_docker_container(client, 'gitlab.fi.muni.cz:5050/nlp/ahisto-modules/ocr-eval', command=command, volumes=volumes) with create_temporary_docker_container(client, 'scratch', command='cmd', with create_temporary_docker_container(client, 'gitlab.fi.muni.cz:5050/nlp/ahisto-modules/empty', command='cmd', volumes=volumes) as container: text = extract_text_file_from_container(container, '/input/list.txt') add_text_file_to_container(container, '/output/list.txt', text) Loading @@ -55,7 +55,7 @@ def apply_super_resolution(client, input_volume, postprocessing_volume, gpus: st environment={'NVIDIA_VISIBLE_DEVICES': gpus}, command=command, volumes=volumes) with create_temporary_docker_container(client, 'scratch', command='cmd', with create_temporary_docker_container(client, 'gitlab.fi.muni.cz:5050/nlp/ahisto-modules/empty', command='cmd', volumes=volumes) as container: filenames = [ '{}_noise_scale.png'.format(Path(filename).with_suffix('')) Loading
ahisto_ocr/volume.py +2 −2 Original line number Diff line number Diff line Loading @@ -14,7 +14,7 @@ LOGGER = getLogger(__name__) def copy_input_to(client, volume, input_dir: Path, input_images: List[Path]) -> None: LOGGER.info(f'Copying {len(input_images)} input images from {input_dir}/ to Docker volume {volume.short_id}') volumes = {volume.name: {'bind': '/input', 'mode': 'rw'}} with create_temporary_docker_container(client, 'scratch', command='cmd', with create_temporary_docker_container(client, 'gitlab.fi.muni.cz:5050/nlp/ahisto-modules/empty', command='cmd', volumes=volumes) as container: with TemporaryFile('w+b') as tf: with tarfile.open(fileobj=tf, mode='w') as tar: Loading @@ -32,7 +32,7 @@ def copy_input_to(client, volume, input_dir: Path, input_images: List[Path]) -> def copy_output_from(client, volume, input_images: List[Path], output_dir: Path) -> None: LOGGER.info(f'Copying {len(input_images)} OCR texts from Docker volume {volume.short_id} to {output_dir}/') volumes = {volume.name: {'bind': '/output', 'mode': 'ro'}} with create_temporary_docker_container(client, 'scratch', command='cmd', with create_temporary_docker_container(client, 'gitlab.fi.muni.cz:5050/nlp/ahisto-modules/empty', command='cmd', volumes=volumes) as container: with TemporaryFile('w+b') as tf: for chunk in container.get_archive('/output')[0]: Loading