From 90ba6a12c7d87c6335f8467df8104d50f6e423c3 Mon Sep 17 00:00:00 2001 From: Vit Novotny <witiko@mail.muni.cz> Date: Wed, 24 Aug 2022 21:05:51 +0200 Subject: [PATCH] Do not copy /pero-ocr/*.txt to /output of PERO OCR unless the page is single-column --- scripts/combine_tesseract_with_pero_ocr_docker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/combine_tesseract_with_pero_ocr_docker.py b/scripts/combine_tesseract_with_pero_ocr_docker.py index 5d373042..a530d7d2 100644 --- a/scripts/combine_tesseract_with_pero_ocr_docker.py +++ b/scripts/combine_tesseract_with_pero_ocr_docker.py @@ -31,7 +31,7 @@ def main() -> None: (OUTPUT_PATH / tesseract_filename.stem).with_suffix(f'.{suffix}'), ) - for suffix in ('xml', 'txt'): + for suffix in ('xml', ): shutil.copy( (PERO_OCR_PATH / pero_ocr_filename.stem).with_suffix(f'.{suffix}'), (OUTPUT_PATH / pero_ocr_filename.stem).with_suffix(f'.{suffix}'), -- GitLab