Skip to content
Snippets Groups Projects
Unverified Commit a0a480b7 authored by KevinHuSh's avatar KevinHuSh Committed by GitHub
Browse files

continue add layout model for 'laws' (#292)

### What problem does this PR solve?

Issue link:#289

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
parent 243de6ac
No related branches found
No related tags found
No related merge requests found
......@@ -25,8 +25,7 @@ from rag.settings import cron_logger
class Docx(DocxParser):
def __init__(self):
self.model_speciess = ParserType.LAWS.value
super().__init__()
pass
def __clean(self, line):
line = re.sub(r"\u3000", " ", line).strip()
......@@ -52,6 +51,10 @@ class Docx(DocxParser):
class Pdf(PdfParser):
def __init__(self):
self.model_speciess = ParserType.LAWS.value
super().__init__()
def __call__(self, filename, binary=None, from_page=0,
to_page=100000, zoomin=3, callback=None):
callback(msg="OCR is running...")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment