diff --git a/README.md b/README.md
index d720f75fa41c5ba46b20b3942aec2018ab3e8cd8..2e858d855ba3b68662b5116dbc429f68ab522dea 100644
--- a/README.md
+++ b/README.md
@@ -88,8 +88,8 @@ If your machine doesn't have *Docker* installed, please refer to [Install Docker
 > In **user_default_llm** of [service_conf.yaml](./docker/service_conf.yaml), you need to specify LLM factory and your own _API_KEY_.
 > It's O.K if you don't have _API_KEY_ at the moment, you can specify it later at the setting part after starting and logging in the system.
 > - We have supported the flowing LLM factory, and the others is coming soon: 
-> [OpenAI](https://platform.openai.com/login?launch), [通义千问/QWen](https://dashscope.console.aliyun.com/model), 
-> [智谱AI/ZhipuAI](https://open.bigmodel.cn/)
+> [OpenAI](https://platform.openai.com/login?launch), [Tongyi-Qianwen](https://dashscope.console.aliyun.com/model), 
+> [ZHIPU-AI](https://open.bigmodel.cn/), [Moonshot](https://platform.moonshot.cn/docs/docs)
 ```bash
 121:/# git clone https://github.com/infiniflow/ragflow.git
 121:/# cd ragflow/docker
diff --git a/api/db/__init__.py b/api/db/__init__.py
index c1f5d8083d7b466af9d02fb7ad143ac010fedfa2..1ba7938e094e13c1fdbff90edea296e68f7729ea 100644
--- a/api/db/__init__.py
+++ b/api/db/__init__.py
@@ -79,3 +79,4 @@ class ParserType(StrEnum):
     TABLE = "table"
     NAIVE = "naive"
     PICTURE = "picture"
+    ONE = "one"
diff --git a/api/db/init_data.py b/api/db/init_data.py
index a930fb4ab1073484897eb1e08c57af0c02fea152..3418bcfdcf820826d911d6f18daf813c296bcd3c 100644
--- a/api/db/init_data.py
+++ b/api/db/init_data.py
@@ -79,12 +79,12 @@ factory_infos = [{
         "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
         "status": "1",
     },{
-        "name": "通义千问",
+        "name": "Tongyi-Qianwen",
         "logo": "",
         "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
         "status": "1",
     },{
-        "name": "智谱AI",
+        "name": "ZHIPU-AI",
         "logo": "",
         "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
         "status": "1",
@@ -270,6 +270,14 @@ def init_llm_factory():
         except Exception as e:
             pass
 
+    """
+    drop table llm;
+    drop table factories;
+    update tenant_llm set llm_factory='Tongyi-Qianwen' where llm_factory='通义千问';
+    update tenant_llm set llm_factory='ZHIPU-AI' where llm_factory='智谱AI';
+    update tenant set parser_ids='naive:General,one:One,qa:Q&A,resume:Resume,table:Table,laws:Laws,manual:Manual,book:Book,paper:Paper,presentation:Presentation,picture:Picture';
+    """
+
 
 def init_web_data():
     start_time = time.time()
diff --git a/api/settings.py b/api/settings.py
index b2fe8d8e83b1064666e6c5f2d13b7d362a6f41af..030d1983dc7735ab4d02b0c6a14043ccfedace72 100644
--- a/api/settings.py
+++ b/api/settings.py
@@ -52,7 +52,7 @@ REQUEST_MAX_WAIT_SEC = 300
 USE_REGISTRY = get_base_config("use_registry")
 
 default_llm = {
-    "通义千问": {
+    "Tongyi-Qianwen": {
         "chat_model": "qwen-plus",
         "embedding_model": "text-embedding-v2",
         "image2text_model": "qwen-vl-max",
@@ -64,7 +64,7 @@ default_llm = {
         "image2text_model": "gpt-4-vision-preview",
         "asr_model": "whisper-1",
     },
-    "智谱AI": {
+    "ZHIPU-AI": {
         "chat_model": "glm-3-turbo",
         "embedding_model": "embedding-2",
         "image2text_model": "glm-4v",
@@ -84,17 +84,17 @@ default_llm = {
     }
 }
 LLM = get_base_config("user_default_llm", {})
-LLM_FACTORY = LLM.get("factory", "通义千问")
+LLM_FACTORY = LLM.get("factory", "Tongyi-Qianwen")
 if LLM_FACTORY not in default_llm:
-    print("\33[91m【ERROR】\33[0m:", f"LLM factory {LLM_FACTORY} has not supported yet, switch to '通义千问/QWen' automatically, and please check the API_KEY in service_conf.yaml.")
-    LLM_FACTORY = "通义千问"
+    print("\33[91m【ERROR】\33[0m:", f"LLM factory {LLM_FACTORY} has not supported yet, switch to 'Tongyi-Qianwen/QWen' automatically, and please check the API_KEY in service_conf.yaml.")
+    LLM_FACTORY = "Tongyi-Qianwen"
 CHAT_MDL = default_llm[LLM_FACTORY]["chat_model"]
 EMBEDDING_MDL = default_llm[LLM_FACTORY]["embedding_model"]
 ASR_MDL = default_llm[LLM_FACTORY]["asr_model"]
 IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"]
 
 API_KEY = LLM.get("api_key", "")
-PARSERS = LLM.get("parsers", "naive:General,qa:Q&A,resume:Resume,table:Table,laws:Laws,manual:Manual,book:Book,paper:Paper,presentation:Presentation,picture:Picture")
+PARSERS = LLM.get("parsers", "naive:General,one:One,qa:Q&A,resume:Resume,table:Table,laws:Laws,manual:Manual,book:Book,paper:Paper,presentation:Presentation,picture:Picture")
 
 # distribution
 DEPENDENT_DISTRIBUTION = get_base_config("dependent_distribution", False)
diff --git a/rag/app/manual.py b/rag/app/manual.py
index b8b4d7a16aca2226c62eb70ede3d3111b1cb650e..7ca5451971d896af54a1cdce79717dc98bbca3d6 100644
--- a/rag/app/manual.py
+++ b/rag/app/manual.py
@@ -57,7 +57,7 @@ class Pdf(PdfParser):
         sec_ids = []
         sid = 0
         for i, lvl in enumerate(levels):
-            if lvl <= most_level: sid += 1
+            if lvl <= most_level and i > 0 and lvl != levels[i-1]: sid += 1
             sec_ids.append(sid)
             #print(lvl, self.boxes[i]["text"], most_level)
 
@@ -75,7 +75,7 @@ class Pdf(PdfParser):
                     continue
             chunks.append(txt + poss)
             if sec_id >-1: last_sid = sec_id
-        return chunks
+        return chunks, tbls
 
 
 def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", callback=None, **kwargs):
@@ -86,7 +86,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca
 
     if re.search(r"\.pdf$", filename, re.IGNORECASE):
         pdf_parser = Pdf()
-        cks = pdf_parser(filename if not binary else binary,
+        cks, tbls = pdf_parser(filename if not binary else binary,
                            from_page=from_page, to_page=to_page, callback=callback)
     else: raise NotImplementedError("file type not supported yet(pdf supported)")
     doc = {
@@ -100,7 +100,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca
     i = 0
     chunk = []
     tk_cnt = 0
-    res = []
+    res = tokenize_table(tbls, doc, eng)
     def add_chunk():
         nonlocal chunk, res, doc, pdf_parser, tk_cnt
         d = copy.deepcopy(doc)
diff --git a/rag/app/naive.py b/rag/app/naive.py
index 4c82e56632f3c1a5e16836c6196d5bbd638ca64f..230f96784466ea534575b0af0d74798aaf86c392 100644
--- a/rag/app/naive.py
+++ b/rag/app/naive.py
@@ -49,7 +49,7 @@ class Pdf(PdfParser):
 
 def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", callback=None, **kwargs):
     """
-        Supported file formats are docx, pdf, txt.
+        Supported file formats are docx, pdf, excel, txt.
         This method apply the naive ways to chunk files.
         Successive text will be sliced into pieces using 'delimiter'.
         Next, these successive pieces are merge into chunks whose token number is no more than 'Max token number'.
diff --git a/rag/app/one.py b/rag/app/one.py
new file mode 100644
index 0000000000000000000000000000000000000000..d43961a48718234615567cca9a746836cc3b8f4c
--- /dev/null
+++ b/rag/app/one.py
@@ -0,0 +1,108 @@
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import copy
+import re
+from rag.app import laws
+from rag.nlp import huqie, is_english, tokenize, naive_merge, tokenize_table, add_positions
+from deepdoc.parser import PdfParser, ExcelParser
+from rag.settings import cron_logger
+
+
+class Pdf(PdfParser):
+    def __call__(self, filename, binary=None, from_page=0,
+                 to_page=100000, zoomin=3, callback=None):
+        callback(msg="OCR is  running...")
+        self.__images__(
+            filename if not binary else binary,
+            zoomin,
+            from_page,
+            to_page,
+            callback
+        )
+        callback(msg="OCR finished")
+
+        from timeit import default_timer as timer
+        start = timer()
+        self._layouts_rec(zoomin)
+        callback(0.63, "Layout analysis finished.")
+        print("paddle layouts:", timer() - start)
+        self._table_transformer_job(zoomin)
+        callback(0.65, "Table analysis finished.")
+        self._text_merge()
+        callback(0.67, "Text merging finished")
+        tbls = self._extract_table_figure(True, zoomin, True, True)
+        self._concat_downward()
+
+        sections = [(b["text"], self.get_position(b, zoomin)) for i, b in enumerate(self.boxes)]
+        for (img, rows), poss in tbls:
+            sections.append((rows if isinstance(rows, str) else rows[0],
+                             [(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss]))
+        return [txt for txt, _ in sorted(sections, key=lambda x: (x[-1][0][0], x[-1][0][3], x[-1][0][1]))]
+
+
+def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", callback=None, **kwargs):
+    """
+        Supported file formats are docx, pdf, excel, txt.
+        One file forms a chunk which maintains original text order.
+    """
+
+    eng = lang.lower() == "english"#is_english(cks)
+
+    sections = []
+    if re.search(r"\.docx?$", filename, re.IGNORECASE):
+        callback(0.1, "Start to parse.")
+        for txt in laws.Docx()(filename, binary):
+            sections.append(txt)
+        callback(0.8, "Finish parsing.")
+    elif re.search(r"\.pdf$", filename, re.IGNORECASE):
+        pdf_parser = Pdf()
+        sections = pdf_parser(filename if not binary else binary, to_page=to_page, callback=callback)
+    elif re.search(r"\.xlsx?$", filename, re.IGNORECASE):
+        callback(0.1, "Start to parse.")
+        excel_parser = ExcelParser()
+        sections = [excel_parser.html(binary)]
+    elif re.search(r"\.txt$", filename, re.IGNORECASE):
+        callback(0.1, "Start to parse.")
+        txt = ""
+        if binary:
+            txt = binary.decode("utf-8")
+        else:
+            with open(filename, "r") as f:
+                while True:
+                    l = f.readline()
+                    if not l: break
+                    txt += l
+        sections = txt.split("\n")
+        sections = [(l, "") for l in sections if l]
+        callback(0.8, "Finish parsing.")
+    else:
+        raise NotImplementedError("file type not supported yet(docx, pdf, txt supported)")
+
+    doc = {
+        "docnm_kwd": filename,
+        "title_tks": huqie.qie(re.sub(r"\.[a-zA-Z]+$", "", filename))
+    }
+    doc["title_sm_tks"] = huqie.qieqie(doc["title_tks"])
+    tokenize(doc, "\n".join(sections), eng)
+    return [doc]
+
+
+if __name__ == "__main__":
+    import sys
+
+
+    def dummy(prog=None, msg=""):
+        pass
+
+
+    chunk(sys.argv[1], from_page=0, to_page=10, callback=dummy)
diff --git a/rag/llm/__init__.py b/rag/llm/__init__.py
index cc4e46269a8ec6018f9f7b452fe4d1819bb148d6..74a8dbf88b4723ea707585186e718f7c45f5d71b 100644
--- a/rag/llm/__init__.py
+++ b/rag/llm/__init__.py
@@ -21,8 +21,8 @@ from .cv_model import *
 EmbeddingModel = {
     "Local": HuEmbedding,
     "OpenAI": OpenAIEmbed,
-    "通义千问": HuEmbedding, #QWenEmbed,
-    "智谱AI": ZhipuEmbed,
+    "Tongyi-Qianwen": HuEmbedding, #QWenEmbed,
+    "ZHIPU-AI": ZhipuEmbed,
     "Moonshot": HuEmbedding
 }
 
@@ -30,16 +30,16 @@ EmbeddingModel = {
 CvModel = {
     "OpenAI": GptV4,
     "Local": LocalCV,
-    "通义千问": QWenCV,
-    "智谱AI": Zhipu4V,
+    "Tongyi-Qianwen": QWenCV,
+    "ZHIPU-AI": Zhipu4V,
     "Moonshot": LocalCV
 }
 
 
 ChatModel = {
     "OpenAI": GptTurbo,
-    "智谱AI": ZhipuChat,
-    "通义千问": QWenChat,
+    "ZHIPU-AI": ZhipuChat,
+    "Tongyi-Qianwen": QWenChat,
     "Local": LocalLLM,
     "Moonshot": MoonshotChat
 }
diff --git a/rag/nlp/search.py b/rag/nlp/search.py
index f9fbcf25e5dd199eaf474e5c0ded977bc52f4c0a..9f89cd5ab4369bf98ff5ca63438e39d732565634 100644
--- a/rag/nlp/search.py
+++ b/rag/nlp/search.py
@@ -194,7 +194,7 @@ class Dealer:
         return [float(t) for t in txt.split("\t")]
 
     def insert_citations(self, answer, chunks, chunk_v,
-                         embd_mdl, tkweight=0.7, vtweight=0.3):
+                         embd_mdl, tkweight=0.1, vtweight=0.9):
         assert len(chunks) == len(chunk_v)
         pieces = re.split(r"(```)", answer)
         if len(pieces) >= 3:
@@ -243,7 +243,7 @@ class Dealer:
                                                             chunks_tks,
                                                             tkweight, vtweight)
             mx = np.max(sim) * 0.99
-            if mx < 0.7:
+            if mx < 0.65:
                 continue
             cites[idx[i]] = list(
                 set([str(ii) for ii in range(len(chunk_v)) if sim[ii] > mx]))[:4]
diff --git a/rag/svr/task_broker.py b/rag/svr/task_broker.py
index 665ab1e137eab559c558e06cb3e45bae31342525..62f0d0767259c43a08d87fbc2450b79f2cd52377 100644
--- a/rag/svr/task_broker.py
+++ b/rag/svr/task_broker.py
@@ -84,6 +84,7 @@ def dispatch():
             pages = PdfParser.total_page_number(r["name"], MINIO.get(r["kb_id"], r["location"]))
             page_size = 5
             if r["parser_id"] == "paper": page_size = 12
+            if r["parser_id"] == "one": page_size = 1000000000
             for s,e in r["parser_config"].get("pages", [(0,100000)]):
                 e = min(e, pages)
                 for p in range(s, e, page_size):
diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py
index f8438e18ed3a99a388c30ff48f37cda8aaa29768..f88faf7fd67eacb90b0a138412858ae0487feff7 100644
--- a/rag/svr/task_executor.py
+++ b/rag/svr/task_executor.py
@@ -39,7 +39,7 @@ from rag.nlp import search
 from io import BytesIO
 import pandas as pd
 
-from rag.app import laws, paper, presentation, manual, qa, table, book, resume, picture, naive
+from rag.app import laws, paper, presentation, manual, qa, table, book, resume, picture, naive, one
 
 from api.db import LLMType, ParserType
 from api.db.services.document_service import DocumentService
@@ -60,6 +60,7 @@ FACTORY = {
     ParserType.TABLE.value: table,
     ParserType.RESUME.value: resume,
     ParserType.PICTURE.value: picture,
+    ParserType.ONE.value: one,
 }