From 739afa391901331341b90dae4ba791f65a361b4c Mon Sep 17 00:00:00 2001 From: Vit Novotny <witiko@mail.muni.cz> Date: Thu, 11 Nov 2021 19:38:11 +0100 Subject: [PATCH] Evaluate language detection on human annotations --- Makefile | 3 +++ ...cr-google-hires.language-detection-page-results-human | 9 +++++++++ ...r-google-lowres.language-detection-page-results-human | 9 +++++++++ output-ocr3+4.language-detection-page-results-human | 9 +++++++++ output-ocr3.language-detection-page-results-human | 9 +++++++++ ...ut-ocr4-onepass.language-detection-page-results-human | 9 +++++++++ 6 files changed, 48 insertions(+) create mode 100644 output-ocr-google-hires.language-detection-page-results-human create mode 100644 output-ocr-google-lowres.language-detection-page-results-human create mode 100644 output-ocr3+4.language-detection-page-results-human create mode 100644 output-ocr3.language-detection-page-results-human create mode 100644 output-ocr4-onepass.language-detection-page-results-human diff --git a/Makefile b/Makefile index bd583700..27e9f8ff 100644 --- a/Makefile +++ b/Makefile @@ -400,6 +400,9 @@ endef %.language-detection-page-results-hires: $(INPUT_UPSCALED_HIGH_CONFIDENCE_FILENAMES) $(OUTPUT_OCR_GOOGLE_HIRES_DIRNAME) % $(PYTHON_RUN) scripts.evaluate_language_detection_page $^ $(DETECTED_LANGUAGES) $@ +%.language-detection-page-results-human: $(INPUT_UPSCALED_HIGH_CONFIDENCE_FILENAMES) $(LANGUAGE_DETECTION_ANNOTATIONS) % + $(PYTHON_RUN) scripts.evaluate_language_detection_page $^ $(DETECTED_LANGUAGES) $@ annotated + $(addprefix %.detected-languages.,$(THRESHOLDS)): $(INPUT_FILENAMES_FILTERED) % $(PYTHON_RUN) scripts.extract_detected_languages $^ $(DETECTED_LANGUAGES) $@ $(subst .,,$(suffix $@)) diff --git a/output-ocr-google-hires.language-detection-page-results-human b/output-ocr-google-hires.language-detection-page-results-human new file mode 100644 index 00000000..fdbbe895 --- /dev/null +++ b/output-ocr-google-hires.language-detection-page-results-human @@ -0,0 +1,9 @@ +Mean Mean IOU point estimate: 95.76% +Mean Mean IOU 95.00% CI estimate: [94.86%; 96.67%] + +Mean Mean Spearman's rho point estimate: 0.96 +Mean Mean Spearman's rho 95.00% CI estimate: [0.91; 1.01] + +Mean Accuracy@1 point estimate: 99.18% +Mean Accuracy@1 95.00% CI estimate: [97.56%; 100.80%] + diff --git a/output-ocr-google-lowres.language-detection-page-results-human b/output-ocr-google-lowres.language-detection-page-results-human new file mode 100644 index 00000000..53832f99 --- /dev/null +++ b/output-ocr-google-lowres.language-detection-page-results-human @@ -0,0 +1,9 @@ +Mean Mean IOU point estimate: 95.79% +Mean Mean IOU 95.00% CI estimate: [94.86%; 96.71%] + +Mean Mean Spearman's rho point estimate: 0.96 +Mean Mean Spearman's rho 95.00% CI estimate: [0.91; 1.01] + +Mean Accuracy@1 point estimate: 99.18% +Mean Accuracy@1 95.00% CI estimate: [97.56%; 100.80%] + diff --git a/output-ocr3+4.language-detection-page-results-human b/output-ocr3+4.language-detection-page-results-human new file mode 100644 index 00000000..6726f1b8 --- /dev/null +++ b/output-ocr3+4.language-detection-page-results-human @@ -0,0 +1,9 @@ +Mean Mean IOU point estimate: 14.64% +Mean Mean IOU 95.00% CI estimate: [11.52%; 17.76%] + +Mean Mean Spearman's rho point estimate: 0.04 +Mean Mean Spearman's rho 95.00% CI estimate: [-0.07; 0.14] + +Mean Accuracy@1 point estimate: 13.11% +Mean Accuracy@1 95.00% CI estimate: [7.04%; 19.19%] + diff --git a/output-ocr3.language-detection-page-results-human b/output-ocr3.language-detection-page-results-human new file mode 100644 index 00000000..bf5d8b09 --- /dev/null +++ b/output-ocr3.language-detection-page-results-human @@ -0,0 +1,9 @@ +Mean Mean IOU point estimate: 45.69% +Mean Mean IOU 95.00% CI estimate: [39.99%; 51.38%] + +Mean Mean Spearman's rho point estimate: 0.52 +Mean Mean Spearman's rho 95.00% CI estimate: [0.43; 0.61] + +Mean Accuracy@1 point estimate: 53.28% +Mean Accuracy@1 95.00% CI estimate: [44.30%; 62.26%] + diff --git a/output-ocr4-onepass.language-detection-page-results-human b/output-ocr4-onepass.language-detection-page-results-human new file mode 100644 index 00000000..e8077918 --- /dev/null +++ b/output-ocr4-onepass.language-detection-page-results-human @@ -0,0 +1,9 @@ +Mean Mean IOU point estimate: 37.19% +Mean Mean IOU 95.00% CI estimate: [31.87%; 42.52%] + +Mean Mean Spearman's rho point estimate: 0.47 +Mean Mean Spearman's rho 95.00% CI estimate: [0.37; 0.56] + +Mean Accuracy@1 point estimate: 47.54% +Mean Accuracy@1 95.00% CI estimate: [38.55%; 56.53%] + -- GitLab