From 243de6ac9048e607cfbd14d2cbc45c57c846d913 Mon Sep 17 00:00:00 2001 From: KevinHuSh <kevinhu.sh@gmail.com> Date: Wed, 10 Apr 2024 11:59:00 +0800 Subject: [PATCH] add a new model for 'Laws' (#290) ### What problem does this PR solve? Issue link:#289 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- README.md | 5 +++-- README_ja.md | 6 +++--- README_zh.md | 5 +++-- rag/app/laws.py | 5 ++++- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index d9639df..95a24c4 100644 --- a/README.md +++ b/README.md @@ -172,8 +172,9 @@ $ docker compose up -d ## 🆕 Latest Features -- Support [Ollama](./docs/ollama.md) for local LLM deployment. -- Support Chinese UI. +- 2023-04-10 Add a new layout recognize model for method 'Laws'. +- 2023-04-08 Support [Ollama](./docs/ollama.md) for local LLM deployment. +- 2023-04-07 Support Chinese UI. ## đź“ś Roadmap diff --git a/README_ja.md b/README_ja.md index 8b07c1b..cd5de63 100644 --- a/README_ja.md +++ b/README_ja.md @@ -171,9 +171,9 @@ $ docker compose up -d ``` ## 🆕 最新ă®ć–°ć©źč˝ - -- [Ollama](./docs/ollama.md) を使用ă—ăźĺ¤§č¦Źć¨ˇă˘ă‡ă«ă®ăăĽă‚«ă©ă‚¤ă‚şă•ă‚Śăźă‡ă—ăイăˇăłăをサăťăĽăă—ăľă™ă€‚ -- ä¸ĺ›˝čŞžă‚¤ăłă‚żăĽă•ă‚§ăĽă‚ąă‚’サăťăĽăă—ăľă™ă€‚ +- 2023-04-10 ăˇă‚˝ăă‰ă€ŚLaws」ă«ć–°ă—ă„ă¬ă‚¤ă‚˘ă‚¦ă認čă˘ă‡ă«ă‚’čż˝ĺŠ ă—ăľă™ă€‚ +- 2023-04-08 [Ollama](./docs/ollama.md) を使用ă—ăźĺ¤§č¦Źć¨ˇă˘ă‡ă«ă®ăăĽă‚«ă©ă‚¤ă‚şă•ă‚Śăźă‡ă—ăイăˇăłăをサăťăĽăă—ăľă™ă€‚ +- 2023-04-07 ä¸ĺ›˝čŞžă‚¤ăłă‚żăĽă•ă‚§ăĽă‚ąă‚’サăťăĽăă—ăľă™ă€‚ ## đź“ś ăăĽă‰ăžăă— diff --git a/README_zh.md b/README_zh.md index af1e9d9..e6ed360 100644 --- a/README_zh.md +++ b/README_zh.md @@ -172,8 +172,9 @@ $ docker compose up -d ## 🆕 最近新特性 -- 支ćŚç”¨ [Ollama](./docs/ollama.md) 对大模型进行本地化é¨ç˝˛ă€‚ -- 支ćŚä¸ć–‡ç•Śéť˘ă€‚ +- 2023-04-10 为â€Laws’ç‰éť˘ĺ†ćžĺ˘žĺŠ 了模型。 +- 2023-04-08 支ćŚç”¨ [Ollama](./docs/ollama.md) 对大模型进行本地化é¨ç˝˛ă€‚ +- 2023-04-07 支ćŚä¸ć–‡ç•Śéť˘ă€‚ ## đź“ś 路线图 diff --git a/rag/app/laws.py b/rag/app/laws.py index 1c99479..4478696 100644 --- a/rag/app/laws.py +++ b/rag/app/laws.py @@ -14,6 +14,8 @@ import copy import re from io import BytesIO from docx import Document + +from api.db import ParserType from rag.nlp import bullets_category, is_english, tokenize, remove_contents_table, hierarchical_merge, \ make_colon_as_title, add_positions, tokenize_chunks from rag.nlp import huqie @@ -23,7 +25,8 @@ from rag.settings import cron_logger class Docx(DocxParser): def __init__(self): - pass + self.model_speciess = ParserType.LAWS.value + super().__init__() def __clean(self, line): line = re.sub(r"\u3000", " ", line).strip() -- GitLab