From 005c77b2dd26625c3f112f0449d158f2dd5800e9 Mon Sep 17 00:00:00 2001
From: Vit Novotny <witiko@mail.muni.cz>
Date: Fri, 12 Nov 2021 15:22:53 +0100
Subject: [PATCH] Add language detection results with both old and new
 algorithms

---
 Makefile                                      | 27 ++++++++++++++-----
 ...language-detection-page-results-hires-olda |  9 +++++++
 ...language-detection-page-results-human-nlda |  0
 ...language-detection-page-results-human-olda |  9 +++++++
 ...anguage-detection-page-results-lowres-olda |  9 +++++++
 ...language-detection-page-results-hires-olda |  9 +++++++
 ...language-detection-page-results-human-nlda |  0
 ...language-detection-page-results-human-olda |  9 +++++++
 ...anguage-detection-page-results-lowres-olda |  9 +++++++
 ...language-detection-page-results-hires-nlda |  0
 ...language-detection-page-results-hires-olda |  9 +++++++
 ...language-detection-page-results-human-nlda |  0
 ...language-detection-page-results-human-olda |  9 +++++++
 ...anguage-detection-page-results-lowres-nlda |  0
 ...anguage-detection-page-results-lowres-olda |  9 +++++++
 ...language-detection-page-results-hires-nlda |  0
 ...language-detection-page-results-hires-olda |  9 +++++++
 ...language-detection-page-results-human-nlda |  0
 ...language-detection-page-results-human-olda |  9 +++++++
 ...anguage-detection-page-results-lowres-nlda |  0
 ...anguage-detection-page-results-lowres-olda |  9 +++++++
 ...language-detection-page-results-hires-nlda |  0
 ...language-detection-page-results-hires-olda |  9 +++++++
 ...language-detection-page-results-human-nlda |  0
 ...language-detection-page-results-human-olda |  9 +++++++
 ...anguage-detection-page-results-lowres-nlda |  0
 ...anguage-detection-page-results-lowres-olda |  9 +++++++
 27 files changed, 156 insertions(+), 6 deletions(-)
 create mode 100644 output-ocr-google-hires.language-detection-page-results-hires-olda
 rename output-ocr-google-hires.language-detection-page-results-human => output-ocr-google-hires.language-detection-page-results-human-nlda (100%)
 create mode 100644 output-ocr-google-hires.language-detection-page-results-human-olda
 create mode 100644 output-ocr-google-hires.language-detection-page-results-lowres-olda
 create mode 100644 output-ocr-google-lowres.language-detection-page-results-hires-olda
 rename output-ocr-google-lowres.language-detection-page-results-human => output-ocr-google-lowres.language-detection-page-results-human-nlda (100%)
 create mode 100644 output-ocr-google-lowres.language-detection-page-results-human-olda
 create mode 100644 output-ocr-google-lowres.language-detection-page-results-lowres-olda
 rename output-ocr3+4.language-detection-page-results-hires => output-ocr3+4.language-detection-page-results-hires-nlda (100%)
 create mode 100644 output-ocr3+4.language-detection-page-results-hires-olda
 rename output-ocr3+4.language-detection-page-results-human => output-ocr3+4.language-detection-page-results-human-nlda (100%)
 create mode 100644 output-ocr3+4.language-detection-page-results-human-olda
 rename output-ocr3+4.language-detection-page-results-lowres => output-ocr3+4.language-detection-page-results-lowres-nlda (100%)
 create mode 100644 output-ocr3+4.language-detection-page-results-lowres-olda
 rename output-ocr3.language-detection-page-results-hires => output-ocr3.language-detection-page-results-hires-nlda (100%)
 create mode 100644 output-ocr3.language-detection-page-results-hires-olda
 rename output-ocr3.language-detection-page-results-human => output-ocr3.language-detection-page-results-human-nlda (100%)
 create mode 100644 output-ocr3.language-detection-page-results-human-olda
 rename output-ocr3.language-detection-page-results-lowres => output-ocr3.language-detection-page-results-lowres-nlda (100%)
 create mode 100644 output-ocr3.language-detection-page-results-lowres-olda
 rename output-ocr4-onepass.language-detection-page-results-hires => output-ocr4-onepass.language-detection-page-results-hires-nlda (100%)
 create mode 100644 output-ocr4-onepass.language-detection-page-results-hires-olda
 rename output-ocr4-onepass.language-detection-page-results-human => output-ocr4-onepass.language-detection-page-results-human-nlda (100%)
 create mode 100644 output-ocr4-onepass.language-detection-page-results-human-olda
 rename output-ocr4-onepass.language-detection-page-results-lowres => output-ocr4-onepass.language-detection-page-results-lowres-nlda (100%)
 create mode 100644 output-ocr4-onepass.language-detection-page-results-lowres-olda

diff --git a/Makefile b/Makefile
index 27e9f8ff..79bd0aca 100644
--- a/Makefile
+++ b/Makefile
@@ -394,14 +394,29 @@ endef
 %.accuracy-results-human: $(INPUT_HUMAN_JUDGEMENTS_UPSCALED_HIGH_CONFIDENCE_FILENAMES) $(OUTPUT_RELEVANT_PAGES) $(INPUT_HUMAN_JUDGEMENTS_DIRNAME) %
 	$(PYTHON_RUN) scripts.evaluate_accuracy $^ $@ $@.difficult-pages
 
-%.language-detection-page-results-lowres: $(INPUT_UPSCALED_HIGH_CONFIDENCE_FILENAMES) $(OUTPUT_OCR_GOOGLE_LOWRES_DIRNAME) %
-	$(PYTHON_RUN) scripts.evaluate_language_detection_page $^ $(DETECTED_LANGUAGES) $@
+%.accuracy-results-human-with-columns: $(INPUT_HUMAN_JUDGEMENTS_UPSCALED_HIGH_CONFIDENCE_FILENAMES_WITH_COLUMNS) $(OUTPUT_RELEVANT_PAGES) $(INPUT_HUMAN_JUDGEMENTS_WITH_COLUMNS_DIRNAME) %
+	$(PYTHON_RUN) scripts.evaluate_accuracy $^ $@ $@.difficult-pages
+
+%.accuracy-results-human-without-columns: $(INPUT_HUMAN_JUDGEMENTS_UPSCALED_HIGH_CONFIDENCE_FILENAMES_WITHOUT_COLUMNS) $(OUTPUT_RELEVANT_PAGES) $(INPUT_HUMAN_JUDGEMENTS_WITHOUT_COLUMNS_DIRNAME) %
+	$(PYTHON_RUN) scripts.evaluate_accuracy $^ $@ $@.difficult-pages
+
+%.language-detection-page-results-lowres-olda: $(INPUT_UPSCALED_HIGH_CONFIDENCE_FILENAMES) $(OUTPUT_OCR_GOOGLE_LOWRES_DIRNAME) %
+	$(PYTHON_RUN) scripts.evaluate_language_detection_page $^ $(DETECTED_LANGUAGES) $@ OLDA OLDA
+
+%.language-detection-page-results-hires-olda: $(INPUT_UPSCALED_HIGH_CONFIDENCE_FILENAMES) $(OUTPUT_OCR_GOOGLE_HIRES_DIRNAME) %
+	$(PYTHON_RUN) scripts.evaluate_language_detection_page $^ $(DETECTED_LANGUAGES) $@ OLDA OLDA
+
+%.language-detection-page-results-human-olda: $(INPUT_UPSCALED_HIGH_CONFIDENCE_FILENAMES) $(LANGUAGE_DETECTION_ANNOTATIONS) %
+	$(PYTHON_RUN) scripts.evaluate_language_detection_page $^ $(DETECTED_LANGUAGES) $@ annotated OLDA
+
+%.language-detection-page-results-lowres-nlda: $(INPUT_UPSCALED_HIGH_CONFIDENCE_FILENAMES) $(OUTPUT_OCR_GOOGLE_LOWRES_DIRNAME) %
+	$(PYTHON_RUN) scripts.evaluate_language_detection_page $^ $(DETECTED_LANGUAGES) $@ OLDA NLDA
 
-%.language-detection-page-results-hires: $(INPUT_UPSCALED_HIGH_CONFIDENCE_FILENAMES) $(OUTPUT_OCR_GOOGLE_HIRES_DIRNAME) %
-	$(PYTHON_RUN) scripts.evaluate_language_detection_page $^ $(DETECTED_LANGUAGES) $@
+%.language-detection-page-results-hires-nlda: $(INPUT_UPSCALED_HIGH_CONFIDENCE_FILENAMES) $(OUTPUT_OCR_GOOGLE_HIRES_DIRNAME) %
+	$(PYTHON_RUN) scripts.evaluate_language_detection_page $^ $(DETECTED_LANGUAGES) $@ OLDA NLDA
 
-%.language-detection-page-results-human: $(INPUT_UPSCALED_HIGH_CONFIDENCE_FILENAMES) $(LANGUAGE_DETECTION_ANNOTATIONS) %
-	$(PYTHON_RUN) scripts.evaluate_language_detection_page $^ $(DETECTED_LANGUAGES) $@ annotated
+%.language-detection-page-results-human-nlda: $(INPUT_UPSCALED_HIGH_CONFIDENCE_FILENAMES) $(LANGUAGE_DETECTION_ANNOTATIONS) %
+	$(PYTHON_RUN) scripts.evaluate_language_detection_page $^ $(DETECTED_LANGUAGES) $@ annotated NLDA
 
 $(addprefix %.detected-languages.,$(THRESHOLDS)): $(INPUT_FILENAMES_FILTERED) %
 	$(PYTHON_RUN) scripts.extract_detected_languages $^ $(DETECTED_LANGUAGES) $@ $(subst .,,$(suffix $@))
diff --git a/output-ocr-google-hires.language-detection-page-results-hires-olda b/output-ocr-google-hires.language-detection-page-results-hires-olda
new file mode 100644
index 00000000..147c2319
--- /dev/null
+++ b/output-ocr-google-hires.language-detection-page-results-hires-olda
@@ -0,0 +1,9 @@
+Mean Mean IOU point estimate: 100.00%
+Mean Mean IOU 95.00% CI estimate: [nan%; nan%]
+
+Mean Mean Spearman's rho point estimate: 1.00
+Mean Mean Spearman's rho 95.00% CI estimate: [1.00; 1.00]
+
+Mean Accuracy@1 point estimate: 100.00%
+Mean Accuracy@1 95.00% CI estimate: [nan%; nan%]
+
diff --git a/output-ocr-google-hires.language-detection-page-results-human b/output-ocr-google-hires.language-detection-page-results-human-nlda
similarity index 100%
rename from output-ocr-google-hires.language-detection-page-results-human
rename to output-ocr-google-hires.language-detection-page-results-human-nlda
diff --git a/output-ocr-google-hires.language-detection-page-results-human-olda b/output-ocr-google-hires.language-detection-page-results-human-olda
new file mode 100644
index 00000000..f1350096
--- /dev/null
+++ b/output-ocr-google-hires.language-detection-page-results-human-olda
@@ -0,0 +1,9 @@
+Mean Mean IOU point estimate: 92.42%
+Mean Mean IOU 95.00% CI estimate: [91.07%; 93.77%]
+
+Mean Mean Spearman's rho point estimate: 0.90
+Mean Mean Spearman's rho 95.00% CI estimate: [0.85; 0.95]
+
+Mean Accuracy@1 point estimate: 99.18%
+Mean Accuracy@1 95.00% CI estimate: [97.56%; 100.80%]
+
diff --git a/output-ocr-google-hires.language-detection-page-results-lowres-olda b/output-ocr-google-hires.language-detection-page-results-lowres-olda
new file mode 100644
index 00000000..64b646ff
--- /dev/null
+++ b/output-ocr-google-hires.language-detection-page-results-lowres-olda
@@ -0,0 +1,9 @@
+Mean Mean IOU point estimate: 98.03%
+Mean Mean IOU 95.00% CI estimate: [98.00%; 98.06%]
+
+Mean Mean Spearman's rho point estimate: 0.98
+Mean Mean Spearman's rho 95.00% CI estimate: [0.97; 0.98]
+
+Mean Accuracy@1 point estimate: 99.48%
+Mean Accuracy@1 95.00% CI estimate: [99.42%; 99.53%]
+
diff --git a/output-ocr-google-lowres.language-detection-page-results-hires-olda b/output-ocr-google-lowres.language-detection-page-results-hires-olda
new file mode 100644
index 00000000..64b646ff
--- /dev/null
+++ b/output-ocr-google-lowres.language-detection-page-results-hires-olda
@@ -0,0 +1,9 @@
+Mean Mean IOU point estimate: 98.03%
+Mean Mean IOU 95.00% CI estimate: [98.00%; 98.06%]
+
+Mean Mean Spearman's rho point estimate: 0.98
+Mean Mean Spearman's rho 95.00% CI estimate: [0.97; 0.98]
+
+Mean Accuracy@1 point estimate: 99.48%
+Mean Accuracy@1 95.00% CI estimate: [99.42%; 99.53%]
+
diff --git a/output-ocr-google-lowres.language-detection-page-results-human b/output-ocr-google-lowres.language-detection-page-results-human-nlda
similarity index 100%
rename from output-ocr-google-lowres.language-detection-page-results-human
rename to output-ocr-google-lowres.language-detection-page-results-human-nlda
diff --git a/output-ocr-google-lowres.language-detection-page-results-human-olda b/output-ocr-google-lowres.language-detection-page-results-human-olda
new file mode 100644
index 00000000..524d71fb
--- /dev/null
+++ b/output-ocr-google-lowres.language-detection-page-results-human-olda
@@ -0,0 +1,9 @@
+Mean Mean IOU point estimate: 92.39%
+Mean Mean IOU 95.00% CI estimate: [90.94%; 93.85%]
+
+Mean Mean Spearman's rho point estimate: 0.88
+Mean Mean Spearman's rho 95.00% CI estimate: [0.82; 0.93]
+
+Mean Accuracy@1 point estimate: 99.18%
+Mean Accuracy@1 95.00% CI estimate: [97.56%; 100.80%]
+
diff --git a/output-ocr-google-lowres.language-detection-page-results-lowres-olda b/output-ocr-google-lowres.language-detection-page-results-lowres-olda
new file mode 100644
index 00000000..147c2319
--- /dev/null
+++ b/output-ocr-google-lowres.language-detection-page-results-lowres-olda
@@ -0,0 +1,9 @@
+Mean Mean IOU point estimate: 100.00%
+Mean Mean IOU 95.00% CI estimate: [nan%; nan%]
+
+Mean Mean Spearman's rho point estimate: 1.00
+Mean Mean Spearman's rho 95.00% CI estimate: [1.00; 1.00]
+
+Mean Accuracy@1 point estimate: 100.00%
+Mean Accuracy@1 95.00% CI estimate: [nan%; nan%]
+
diff --git a/output-ocr3+4.language-detection-page-results-hires b/output-ocr3+4.language-detection-page-results-hires-nlda
similarity index 100%
rename from output-ocr3+4.language-detection-page-results-hires
rename to output-ocr3+4.language-detection-page-results-hires-nlda
diff --git a/output-ocr3+4.language-detection-page-results-hires-olda b/output-ocr3+4.language-detection-page-results-hires-olda
new file mode 100644
index 00000000..fa1749f0
--- /dev/null
+++ b/output-ocr3+4.language-detection-page-results-hires-olda
@@ -0,0 +1,9 @@
+Mean Mean IOU point estimate: 19.61%
+Mean Mean IOU 95.00% CI estimate: [19.43%; 19.79%]
+
+Mean Mean Spearman's rho point estimate: -0.12
+Mean Mean Spearman's rho 95.00% CI estimate: [-0.12; -0.11]
+
+Mean Accuracy@1 point estimate: 24.55%
+Mean Accuracy@1 95.00% CI estimate: [24.22%; 24.88%]
+
diff --git a/output-ocr3+4.language-detection-page-results-human b/output-ocr3+4.language-detection-page-results-human-nlda
similarity index 100%
rename from output-ocr3+4.language-detection-page-results-human
rename to output-ocr3+4.language-detection-page-results-human-nlda
diff --git a/output-ocr3+4.language-detection-page-results-human-olda b/output-ocr3+4.language-detection-page-results-human-olda
new file mode 100644
index 00000000..e95d7ecf
--- /dev/null
+++ b/output-ocr3+4.language-detection-page-results-human-olda
@@ -0,0 +1,9 @@
+Mean Mean IOU point estimate: 10.36%
+Mean Mean IOU 95.00% CI estimate: [7.14%; 13.59%]
+
+Mean Mean Spearman's rho point estimate: -0.14
+Mean Mean Spearman's rho 95.00% CI estimate: [-0.23; -0.05]
+
+Mean Accuracy@1 point estimate: 11.48%
+Mean Accuracy@1 95.00% CI estimate: [5.74%; 17.21%]
+
diff --git a/output-ocr3+4.language-detection-page-results-lowres b/output-ocr3+4.language-detection-page-results-lowres-nlda
similarity index 100%
rename from output-ocr3+4.language-detection-page-results-lowres
rename to output-ocr3+4.language-detection-page-results-lowres-nlda
diff --git a/output-ocr3+4.language-detection-page-results-lowres-olda b/output-ocr3+4.language-detection-page-results-lowres-olda
new file mode 100644
index 00000000..1cbe7fcd
--- /dev/null
+++ b/output-ocr3+4.language-detection-page-results-lowres-olda
@@ -0,0 +1,9 @@
+Mean Mean IOU point estimate: 19.60%
+Mean Mean IOU 95.00% CI estimate: [19.42%; 19.78%]
+
+Mean Mean Spearman's rho point estimate: -0.12
+Mean Mean Spearman's rho 95.00% CI estimate: [-0.12; -0.12]
+
+Mean Accuracy@1 point estimate: 24.53%
+Mean Accuracy@1 95.00% CI estimate: [24.20%; 24.86%]
+
diff --git a/output-ocr3.language-detection-page-results-hires b/output-ocr3.language-detection-page-results-hires-nlda
similarity index 100%
rename from output-ocr3.language-detection-page-results-hires
rename to output-ocr3.language-detection-page-results-hires-nlda
diff --git a/output-ocr3.language-detection-page-results-hires-olda b/output-ocr3.language-detection-page-results-hires-olda
new file mode 100644
index 00000000..6b3d1ec8
--- /dev/null
+++ b/output-ocr3.language-detection-page-results-hires-olda
@@ -0,0 +1,9 @@
+Mean Mean IOU point estimate: 48.26%
+Mean Mean IOU 95.00% CI estimate: [48.02%; 48.50%]
+
+Mean Mean Spearman's rho point estimate: 0.36
+Mean Mean Spearman's rho 95.00% CI estimate: [0.36; 0.36]
+
+Mean Accuracy@1 point estimate: 65.01%
+Mean Accuracy@1 95.00% CI estimate: [64.64%; 65.38%]
+
diff --git a/output-ocr3.language-detection-page-results-human b/output-ocr3.language-detection-page-results-human-nlda
similarity index 100%
rename from output-ocr3.language-detection-page-results-human
rename to output-ocr3.language-detection-page-results-human-nlda
diff --git a/output-ocr3.language-detection-page-results-human-olda b/output-ocr3.language-detection-page-results-human-olda
new file mode 100644
index 00000000..c00c2faa
--- /dev/null
+++ b/output-ocr3.language-detection-page-results-human-olda
@@ -0,0 +1,9 @@
+Mean Mean IOU point estimate: 27.97%
+Mean Mean IOU 95.00% CI estimate: [23.03%; 32.92%]
+
+Mean Mean Spearman's rho point estimate: 0.26
+Mean Mean Spearman's rho 95.00% CI estimate: [0.17; 0.35]
+
+Mean Accuracy@1 point estimate: 42.62%
+Mean Accuracy@1 95.00% CI estimate: [33.72%; 51.52%]
+
diff --git a/output-ocr3.language-detection-page-results-lowres b/output-ocr3.language-detection-page-results-lowres-nlda
similarity index 100%
rename from output-ocr3.language-detection-page-results-lowres
rename to output-ocr3.language-detection-page-results-lowres-nlda
diff --git a/output-ocr3.language-detection-page-results-lowres-olda b/output-ocr3.language-detection-page-results-lowres-olda
new file mode 100644
index 00000000..6add29b1
--- /dev/null
+++ b/output-ocr3.language-detection-page-results-lowres-olda
@@ -0,0 +1,9 @@
+Mean Mean IOU point estimate: 48.25%
+Mean Mean IOU 95.00% CI estimate: [48.01%; 48.49%]
+
+Mean Mean Spearman's rho point estimate: 0.36
+Mean Mean Spearman's rho 95.00% CI estimate: [0.36; 0.36]
+
+Mean Accuracy@1 point estimate: 65.00%
+Mean Accuracy@1 95.00% CI estimate: [64.63%; 65.36%]
+
diff --git a/output-ocr4-onepass.language-detection-page-results-hires b/output-ocr4-onepass.language-detection-page-results-hires-nlda
similarity index 100%
rename from output-ocr4-onepass.language-detection-page-results-hires
rename to output-ocr4-onepass.language-detection-page-results-hires-nlda
diff --git a/output-ocr4-onepass.language-detection-page-results-hires-olda b/output-ocr4-onepass.language-detection-page-results-hires-olda
new file mode 100644
index 00000000..6797db13
--- /dev/null
+++ b/output-ocr4-onepass.language-detection-page-results-hires-olda
@@ -0,0 +1,9 @@
+Mean Mean IOU point estimate: 49.84%
+Mean Mean IOU 95.00% CI estimate: [49.60%; 50.08%]
+
+Mean Mean Spearman's rho point estimate: 0.33
+Mean Mean Spearman's rho 95.00% CI estimate: [0.32; 0.33]
+
+Mean Accuracy@1 point estimate: 66.49%
+Mean Accuracy@1 95.00% CI estimate: [66.13%; 66.85%]
+
diff --git a/output-ocr4-onepass.language-detection-page-results-human b/output-ocr4-onepass.language-detection-page-results-human-nlda
similarity index 100%
rename from output-ocr4-onepass.language-detection-page-results-human
rename to output-ocr4-onepass.language-detection-page-results-human-nlda
diff --git a/output-ocr4-onepass.language-detection-page-results-human-olda b/output-ocr4-onepass.language-detection-page-results-human-olda
new file mode 100644
index 00000000..2dada04f
--- /dev/null
+++ b/output-ocr4-onepass.language-detection-page-results-human-olda
@@ -0,0 +1,9 @@
+Mean Mean IOU point estimate: 26.18%
+Mean Mean IOU 95.00% CI estimate: [21.13%; 31.23%]
+
+Mean Mean Spearman's rho point estimate: 0.22
+Mean Mean Spearman's rho 95.00% CI estimate: [0.12; 0.32]
+
+Mean Accuracy@1 point estimate: 36.89%
+Mean Accuracy@1 95.00% CI estimate: [28.20%; 45.57%]
+
diff --git a/output-ocr4-onepass.language-detection-page-results-lowres b/output-ocr4-onepass.language-detection-page-results-lowres-nlda
similarity index 100%
rename from output-ocr4-onepass.language-detection-page-results-lowres
rename to output-ocr4-onepass.language-detection-page-results-lowres-nlda
diff --git a/output-ocr4-onepass.language-detection-page-results-lowres-olda b/output-ocr4-onepass.language-detection-page-results-lowres-olda
new file mode 100644
index 00000000..b05745f1
--- /dev/null
+++ b/output-ocr4-onepass.language-detection-page-results-lowres-olda
@@ -0,0 +1,9 @@
+Mean Mean IOU point estimate: 49.84%
+Mean Mean IOU 95.00% CI estimate: [49.60%; 50.08%]
+
+Mean Mean Spearman's rho point estimate: 0.33
+Mean Mean Spearman's rho 95.00% CI estimate: [0.32; 0.33]
+
+Mean Accuracy@1 point estimate: 66.48%
+Mean Accuracy@1 95.00% CI estimate: [66.12%; 66.84%]
+
-- 
GitLab