From 37185466e231632c39a150f2dde90d99d530db28 Mon Sep 17 00:00:00 2001
From: KevinHuSh <kevinhu.sh@gmail.com>
Date: Wed, 27 Mar 2024 13:14:36 +0800
Subject: [PATCH] READEME refined (#156)

---
 README.md         | 27 ++++++++++++++-------------
 rag/app/naive.py  |  6 ++++--
 rag/nlp/search.py |  2 +-
 3 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index 71e0316..c3123cc 100644
--- a/README.md
+++ b/README.md
@@ -79,10 +79,6 @@ vm.max_map_count=262144
 If your machine doesn't have *Docker* installed, please refer to [Install Docker Engine](https://docs.docker.com/engine/install/)
 
 ## Quick Start
-> If you want to change the basic setups, like port, password .etc., please refer to [.env](./docker/.env) before starting the system.
-
-> If you change anything in [.env](./docker/.env), please check [service_conf.yaml](./docker/service_conf.yaml) which is a 
-> configuration of the back-end service and should be consistent with [.env](./docker/.env).
 
 > - In [service_conf.yaml](./docker/service_conf.yaml), configuration of *LLM* in **user_default_llm** is strongly recommended. 
 > In **user_default_llm** of [service_conf.yaml](./docker/service_conf.yaml), you need to specify LLM factory and your own _API_KEY_.
@@ -91,25 +87,25 @@ If your machine doesn't have *Docker* installed, please refer to [Install Docker
 > [OpenAI](https://platform.openai.com/login?launch), [Tongyi-Qianwen](https://dashscope.console.aliyun.com/model), 
 > [ZHIPU-AI](https://open.bigmodel.cn/), [Moonshot](https://platform.moonshot.cn/docs/docs)
 ```bash
-121:/# git clone https://github.com/infiniflow/ragflow.git
-121:/# cd ragflow/docker
-121:/ragflow/docker# docker compose up -d
+$ git clone https://github.com/infiniflow/ragflow.git
+$ cd ragflow/docker
+$ docker compose up -d
 ```
 ### OR
 
 ```bash
-121:/# git clone https://github.com/infiniflow/ragflow.git
-121:/# cd ragflow/
-121:/# docker build  -t infiniflow/ragflow:v1.0 .
-121:/# cd ragflow/docker
-121:/ragflow/docker# docker compose up -d
+$ git clone https://github.com/infiniflow/ragflow.git
+$ cd ragflow/
+$ docker build  -t infiniflow/ragflow:v1.0 .
+$ cd ragflow/docker
+$ docker compose up -d
 ```
 > The core image is about 15GB, please be patient for the first time
 
 After pulling all the images and running up, use the following command to check the server status. If you can have the following outputs, 
 _**Hallelujah!**_ You have successfully launched the system.
 ```bash
-121:/ragflow# docker logs -f  ragflow-server
+$ docker logs -f  ragflow-server
 
     ____                 ______ __               
    / __ \ ____ _ ____ _ / ____// /____  _      __
@@ -139,6 +135,11 @@ If you need to change the default setting of the system when you deploy it. Ther
 Please refer to [README](./docker/README.md) and manually set the configuration. 
 After changing something, please run *docker-compose up -d* again. 
 
+> If you want to change the basic setups, like port, password .etc., please refer to [.env](./docker/.env) before starting the system.
+
+> If you change anything in [.env](./docker/.env), please check [service_conf.yaml](./docker/service_conf.yaml) which is a 
+> configuration of the back-end service and should be consistent with [.env](./docker/.env).
+
 # RoadMap
 
 - [ ] File manager.
diff --git a/rag/app/naive.py b/rag/app/naive.py
index 6dad8a2..dc8726f 100644
--- a/rag/app/naive.py
+++ b/rag/app/naive.py
@@ -42,7 +42,9 @@ class Pdf(PdfParser):
         self._text_merge()
         callback(0.67, "Text merging finished")
         tbls = self._extract_table_figure(True, zoomin, True, True)
-        self._naive_vertical_merge()
+        #self._naive_vertical_merge()
+        self._concat_downward()
+        #self._filter_forpages()
 
         cron_logger.info("paddle layouts:".format(
             (timer() - start) / (self.total_page + 0.1)))
@@ -79,7 +81,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
 
     elif re.search(r"\.pdf$", filename, re.IGNORECASE):
         pdf_parser = Pdf(
-        ) if parser_config["layout_recognize"] else PlainParser()
+        ) if parser_config.get("layout_recognize", True) else PlainParser()
         sections, tbls = pdf_parser(filename if not binary else binary,
                                     from_page=from_page, to_page=to_page, callback=callback)
         res = tokenize_table(tbls, doc, eng)
diff --git a/rag/nlp/search.py b/rag/nlp/search.py
index a3a644f..cc9f533 100644
--- a/rag/nlp/search.py
+++ b/rag/nlp/search.py
@@ -7,7 +7,6 @@ from elasticsearch_dsl import Q, Search
 from typing import List, Optional, Dict, Union
 from dataclasses import dataclass
 
-from api.settings import chat_logger
 from rag.settings import es_logger
 from rag.utils import rmSpace
 from rag.nlp import huqie, query
@@ -365,6 +364,7 @@ class Dealer:
         return ranks
 
     def sql_retrieval(self, sql, fetch_size=128, format="json"):
+        from api.settings import chat_logger
         sql = re.sub(r"[ ]+", " ", sql)
         sql = sql.replace("%", "")
         es_logger.info(f"Get es sql: {sql}")
-- 
GitLab