diff --git a/README.md b/README.md index d9639df4a8d5db970dcd2ea6707c3df26f257de5..95a24c4f7164d4d402818397f926d0ebb8001197 100644 --- a/README.md +++ b/README.md @@ -172,8 +172,9 @@ $ docker compose up -d ## 🆕 Latest Features -- Support [Ollama](./docs/ollama.md) for local LLM deployment. -- Support Chinese UI. +- 2023-04-10 Add a new layout recognize model for method 'Laws'. +- 2023-04-08 Support [Ollama](./docs/ollama.md) for local LLM deployment. +- 2023-04-07 Support Chinese UI. ## đź“ś Roadmap diff --git a/README_ja.md b/README_ja.md index 8b07c1b18e9c4b8b7958f2e7f61ea4550718f0ee..cd5de63ff5f1fba07973bd16ee546e3c0ce2eee0 100644 --- a/README_ja.md +++ b/README_ja.md @@ -171,9 +171,9 @@ $ docker compose up -d ``` ## 🆕 最新ă®ć–°ć©źč˝ - -- [Ollama](./docs/ollama.md) を使用ă—ăźĺ¤§č¦Źć¨ˇă˘ă‡ă«ă®ăăĽă‚«ă©ă‚¤ă‚şă•ă‚Śăźă‡ă—ăイăˇăłăをサăťăĽăă—ăľă™ă€‚ -- ä¸ĺ›˝čŞžă‚¤ăłă‚żăĽă•ă‚§ăĽă‚ąă‚’サăťăĽăă—ăľă™ă€‚ +- 2023-04-10 ăˇă‚˝ăă‰ă€ŚLaws」ă«ć–°ă—ă„ă¬ă‚¤ă‚˘ă‚¦ă認čă˘ă‡ă«ă‚’čż˝ĺŠ ă—ăľă™ă€‚ +- 2023-04-08 [Ollama](./docs/ollama.md) を使用ă—ăźĺ¤§č¦Źć¨ˇă˘ă‡ă«ă®ăăĽă‚«ă©ă‚¤ă‚şă•ă‚Śăźă‡ă—ăイăˇăłăをサăťăĽăă—ăľă™ă€‚ +- 2023-04-07 ä¸ĺ›˝čŞžă‚¤ăłă‚żăĽă•ă‚§ăĽă‚ąă‚’サăťăĽăă—ăľă™ă€‚ ## đź“ś ăăĽă‰ăžăă— diff --git a/README_zh.md b/README_zh.md index af1e9d90ed3472d27b19a7f3eb79d1337016032e..e6ed3608c93ec7d5e3b3238c9f01593ed55f1b2e 100644 --- a/README_zh.md +++ b/README_zh.md @@ -172,8 +172,9 @@ $ docker compose up -d ## 🆕 最近新特性 -- 支ćŚç”¨ [Ollama](./docs/ollama.md) 对大模型进行本地化é¨ç˝˛ă€‚ -- 支ćŚä¸ć–‡ç•Śéť˘ă€‚ +- 2023-04-10 为â€Laws’ç‰éť˘ĺ†ćžĺ˘žĺŠ 了模型。 +- 2023-04-08 支ćŚç”¨ [Ollama](./docs/ollama.md) 对大模型进行本地化é¨ç˝˛ă€‚ +- 2023-04-07 支ćŚä¸ć–‡ç•Śéť˘ă€‚ ## đź“ś 路线图 diff --git a/rag/app/laws.py b/rag/app/laws.py index 1c99479aad8aa23c9b9c196d3f3943c289a9691e..4478696f5744763463063f57635a51133cc15008 100644 --- a/rag/app/laws.py +++ b/rag/app/laws.py @@ -14,6 +14,8 @@ import copy import re from io import BytesIO from docx import Document + +from api.db import ParserType from rag.nlp import bullets_category, is_english, tokenize, remove_contents_table, hierarchical_merge, \ make_colon_as_title, add_positions, tokenize_chunks from rag.nlp import huqie @@ -23,7 +25,8 @@ from rag.settings import cron_logger class Docx(DocxParser): def __init__(self): - pass + self.model_speciess = ParserType.LAWS.value + super().__init__() def __clean(self, line): line = re.sub(r"\u3000", " ", line).strip()