diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py index 67b9d172c7be7cbf1eff4e4488e87bc414c0808b..4c3255c706758027d262066099e414bed881f501 100644 --- a/deepdoc/parser/pdf_parser.py +++ b/deepdoc/parser/pdf_parser.py @@ -470,7 +470,8 @@ class RAGFlowPdfParser: continue if re.match(r"[0-9]{2,3}/[0-9]{3}$", up["text"]) \ - or re.match(r"[0-9]{2,3}/[0-9]{3}$", down["text"]): + or re.match(r"[0-9]{2,3}/[0-9]{3}$", down["text"]) \ + or not down["text"].strip(): i += 1 continue