Skip to content
Snippets Groups Projects
Unverified Commit 23b448cf authored by KevinHuSh's avatar KevinHuSh Committed by GitHub
Browse files

fix docker compose issue (#238)

### What problem does this PR solve?

_Briefly describe what this PR aims to solve. Include background context
that will help reviewers understand the purpose of the PR._

Issue link:#[[Link the issue
here](https://github.com/infiniflow/ragflow/issues/226)]

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
parent b4abbe5d
No related branches found
No related tags found
No related merge requests found
......@@ -65,6 +65,11 @@ def upload():
DocumentService.query,
name=file.filename,
kb_id=kb.id)
filetype = filename_type(filename)
if not filetype:
return get_data_error_result(
retmsg="This type of file has not been supported yet!")
location = filename
while MINIO.obj_exist(kb_id, location):
location += "_"
......
......@@ -25,7 +25,7 @@ from api.utils.api_utils import server_error_response, validate_request
from api.utils import get_uuid, get_format_time, decrypt, download_img
from api.db import UserTenantRole, LLMType
from api.settings import RetCode, GITHUB_OAUTH, CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, API_KEY, \
LLM_FACTORY
LLM_FACTORY, LLM_BASE_URL
from api.db.services.user_service import UserService, TenantService, UserTenantService
from api.settings import stat_logger
from api.utils.api_utils import get_json_result, cors_reponse
......@@ -220,7 +220,9 @@ def user_register(user_id, user):
"llm_factory": LLM_FACTORY,
"llm_name": llm.llm_name,
"model_type": llm.model_type,
"api_key": API_KEY})
"api_key": API_KEY,
"base_url": LLM_BASE_URL
})
if not UserService.save(**user):
return
......
......@@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os
import time
import uuid
......@@ -21,7 +22,7 @@ from api.db.db_models import init_database_tables as init_web_db
from api.db.services import UserService
from api.db.services.llm_service import LLMFactoriesService, LLMService, TenantLLMService, LLMBundle
from api.db.services.user_service import TenantService, UserTenantService
from api.settings import CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, LLM_FACTORY, API_KEY
from api.settings import CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, LLM_FACTORY, API_KEY, LLM_BASE_URL
def init_superuser():
......@@ -53,7 +54,7 @@ def init_superuser():
for llm in LLMService.query(fid=LLM_FACTORY):
tenant_llm.append(
{"tenant_id": user_info["id"], "llm_factory": LLM_FACTORY, "llm_name": llm.llm_name, "model_type": llm.model_type,
"api_key": API_KEY})
"api_key": API_KEY, "base_url": LLM_BASE_URL})
if not UserService.save(**user_info):
print("\033[93m【ERROR】\033[0mcan't init admin.")
......@@ -282,11 +283,8 @@ def init_llm_factory():
pass
"""
modify service_config
drop table llm;
drop table llm_factories;
update tenant_llm set llm_factory='Tongyi-Qianwen' where llm_factory='通义千问';
update tenant_llm set llm_factory='ZHIPU-AI' where llm_factory='智谱AI';
update tenant set parser_ids='naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One';
alter table knowledgebase modify avatar longtext;
alter table user modify avatar longtext;
......
......@@ -91,6 +91,8 @@ default_llm = {
}
LLM = get_base_config("user_default_llm", {})
LLM_FACTORY = LLM.get("factory", "Tongyi-Qianwen")
LLM_BASE_URL = LLM.get("base_url")
if LLM_FACTORY not in default_llm:
print(
"\33[91m【ERROR】\33[0m:",
......
version: '2.2'
services:
es01:
container_name: ragflow-es-01
image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
volumes:
- esdata01:/usr/share/elasticsearch/data
ports:
- ${ES_PORT}:9200
environment:
- node.name=es01
- cluster.name=${CLUSTER_NAME}
- cluster.initial_master_nodes=es01
- ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
- bootstrap.memory_lock=false
- xpack.security.enabled=false
- TZ=${TIMEZONE}
mem_limit: ${MEM_LIMIT}
ulimits:
memlock:
soft: -1
hard: -1
healthcheck:
test: ["CMD-SHELL", "curl http://localhost:9200"]
interval: 10s
timeout: 10s
retries: 120
networks:
- ragflow
restart: always
kibana:
depends_on:
es01:
condition: service_healthy
image: docker.elastic.co/kibana/kibana:${STACK_VERSION}
container_name: ragflow-kibana
volumes:
- kibanadata:/usr/share/kibana/data
ports:
- ${KIBANA_PORT}:5601
environment:
- SERVERNAME=kibana
- ELASTICSEARCH_HOSTS=http://es01:9200
- TZ=${TIMEZONE}
mem_limit: ${MEM_LIMIT}
networks:
- ragflow
mysql:
image: mysql:5.7.18
container_name: ragflow-mysql
environment:
- MYSQL_ROOT_PASSWORD=${MYSQL_PASSWORD}
- TZ=${TIMEZONE}
command:
--max_connections=1000
--character-set-server=utf8mb4
--collation-server=utf8mb4_general_ci
--default-authentication-plugin=mysql_native_password
--tls_version="TLSv1.2,TLSv1.3"
--init-file /data/application/init.sql
ports:
- ${MYSQL_PORT}:3306
volumes:
- mysql_data:/var/lib/mysql
- ./init.sql:/data/application/init.sql
networks:
- ragflow
healthcheck:
test: ["CMD", "mysqladmin" ,"ping", "-uroot", "-p${MYSQL_PASSWORD}"]
interval: 10s
timeout: 10s
retries: 3
restart: always
minio:
image: quay.io/minio/minio:RELEASE.2023-12-20T01-00-02Z
container_name: ragflow-minio
command: server --console-address ":9001" /data
ports:
- 9000:9000
- 9001:9001
environment:
- MINIO_ROOT_USER=${MINIO_USER}
- MINIO_ROOT_PASSWORD=${MINIO_PASSWORD}
- TZ=${TIMEZONE}
volumes:
- minio_data:/data
networks:
- ragflow
restart: always
include:
- path: ./docker-compose-base.yml
env_file: ./.env
ragflow:
services:
ragflow:
depends_on:
mysql:
condition: service_healthy
......@@ -116,18 +29,3 @@ services:
networks:
- ragflow
restart: always
volumes:
esdata01:
driver: local
kibanadata:
driver: local
mysql_data:
driver: local
minio_data:
driver: local
networks:
ragflow:
driver: bridge
version: '2.2'
services:
es01:
container_name: ragflow-es-01
image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
volumes:
- esdata01:/usr/share/elasticsearch/data
ports:
- ${ES_PORT}:9200
environment:
- node.name=es01
- cluster.name=${CLUSTER_NAME}
- cluster.initial_master_nodes=es01
- ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
- bootstrap.memory_lock=false
- xpack.security.enabled=false
- cluster.max_shards_per_node=4096
- TZ=${TIMEZONE}
mem_limit: ${MEM_LIMIT}
ulimits:
memlock:
soft: -1
hard: -1
healthcheck:
test: ["CMD-SHELL", "curl http://localhost:9200"]
interval: 10s
timeout: 10s
retries: 120
networks:
- ragflow
restart: always
kibana:
depends_on:
es01:
condition: service_healthy
image: docker.elastic.co/kibana/kibana:${STACK_VERSION}
container_name: ragflow-kibana
volumes:
- kibanadata:/usr/share/kibana/data
ports:
- ${KIBANA_PORT}:5601
environment:
- SERVERNAME=kibana
- ELASTICSEARCH_HOSTS=http://es01:9200
- TZ=${TIMEZONE}
mem_limit: ${MEM_LIMIT}
networks:
- ragflow
mysql:
image: mysql:5.7.18
container_name: ragflow-mysql
environment:
- MYSQL_ROOT_PASSWORD=${MYSQL_PASSWORD}
- TZ=${TIMEZONE}
command:
--max_connections=1000
--character-set-server=utf8mb4
--collation-server=utf8mb4_general_ci
--default-authentication-plugin=mysql_native_password
--tls_version="TLSv1.2,TLSv1.3"
--init-file /data/application/init.sql
ports:
- ${MYSQL_PORT}:3306
volumes:
- mysql_data:/var/lib/mysql
- ./init.sql:/data/application/init.sql
networks:
- ragflow
healthcheck:
test: ["CMD", "mysqladmin" ,"ping", "-uroot", "-p${MYSQL_PASSWORD}"]
interval: 10s
timeout: 10s
retries: 3
restart: always
minio:
image: quay.io/minio/minio:RELEASE.2023-12-20T01-00-02Z
container_name: ragflow-minio
command: server --console-address ":9001" /data
ports:
- 9000:9000
- 9001:9001
environment:
- MINIO_ROOT_USER=${MINIO_USER}
- MINIO_ROOT_PASSWORD=${MINIO_PASSWORD}
- TZ=${TIMEZONE}
volumes:
- minio_data:/data
networks:
- ragflow
restart: always
volumes:
esdata01:
driver: local
kibanadata:
driver: local
mysql_data:
driver: local
minio_data:
driver: local
networks:
ragflow:
driver: bridge
version: '2.2'
services:
es01:
container_name: ragflow-es-01
image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
volumes:
- esdata01:/usr/share/elasticsearch/data
ports:
- ${ES_PORT}:9200
environment:
- node.name=es01
- cluster.name=${CLUSTER_NAME}
- cluster.initial_master_nodes=es01
- ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
- bootstrap.memory_lock=false
- xpack.security.enabled=false
- TZ=${TIMEZONE}
mem_limit: ${MEM_LIMIT}
ulimits:
memlock:
soft: -1
hard: -1
healthcheck:
test: ["CMD-SHELL", "curl http://localhost:9200"]
interval: 10s
timeout: 10s
retries: 120
networks:
- ragflow
restart: always
kibana:
depends_on:
es01:
condition: service_healthy
image: docker.elastic.co/kibana/kibana:${STACK_VERSION}
container_name: ragflow-kibana
volumes:
- kibanadata:/usr/share/kibana/data
ports:
- ${KIBANA_PORT}:5601
environment:
- SERVERNAME=kibana
- ELASTICSEARCH_HOSTS=http://es01:9200
- TZ=${TIMEZONE}
mem_limit: ${MEM_LIMIT}
networks:
- ragflow
mysql:
image: mysql:5.7.18
container_name: ragflow-mysql
environment:
- MYSQL_ROOT_PASSWORD=${MYSQL_PASSWORD}
- TZ=${TIMEZONE}
command:
--max_connections=1000
--character-set-server=utf8mb4
--collation-server=utf8mb4_general_ci
--default-authentication-plugin=mysql_native_password
--tls_version="TLSv1.2,TLSv1.3"
--init-file /data/application/init.sql
ports:
- ${MYSQL_PORT}:3306
volumes:
- mysql_data:/var/lib/mysql
- ./init.sql:/data/application/init.sql
networks:
- ragflow
healthcheck:
test: ["CMD", "mysqladmin" ,"ping", "-uroot", "-p${MYSQL_PASSWORD}"]
interval: 10s
timeout: 10s
retries: 3
restart: always
minio:
image: quay.io/minio/minio:RELEASE.2023-12-20T01-00-02Z
container_name: ragflow-minio
command: server --console-address ":9001" /data
ports:
- 9000:9000
- 9001:9001
environment:
- MINIO_ROOT_USER=${MINIO_USER}
- MINIO_ROOT_PASSWORD=${MINIO_PASSWORD}
- TZ=${TIMEZONE}
volumes:
- minio_data:/data
networks:
- ragflow
restart: always
include:
- path: ./docker-compose-base.yml
env_file: ./.env
services:
ragflow:
depends_on:
mysql:
......@@ -107,6 +19,7 @@ services:
- 443:443
volumes:
- ./service_conf.yaml:/ragflow/conf/service_conf.yaml
- ./entrypoint.sh:/ragflow/entrypoint.sh
- ./ragflow-logs:/ragflow/logs
- ./nginx/ragflow.conf:/etc/nginx/conf.d/ragflow.conf
- ./nginx/proxy.conf:/etc/nginx/proxy.conf
......@@ -116,18 +29,3 @@ services:
networks:
- ragflow
restart: always
volumes:
esdata01:
driver: local
kibanadata:
driver: local
mysql_data:
driver: local
minio_data:
driver: local
networks:
ragflow:
driver: bridge
......@@ -23,7 +23,7 @@ function watch_broker(){
}
function task_bro(){
sleep 60;
sleep 160;
watch_broker;
}
......
......@@ -18,6 +18,7 @@ es:
user_default_llm:
factory: 'Tongyi-Qianwen'
api_key: 'sk-xxxxxxxxxxxxx'
base_url: ''
oauth:
github:
client_id: xxxxxxxxxxxxxxxxxxxxxxxxx
......
......@@ -10,14 +10,59 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import copy
from io import BytesIO
from docx import Document
import re
from deepdoc.parser.pdf_parser import PlainParser
from rag.app import laws
from rag.nlp import huqie, is_english, tokenize, naive_merge, tokenize_table, add_positions, tokenize_chunks
from deepdoc.parser import PdfParser, ExcelParser
from deepdoc.parser import PdfParser, ExcelParser, DocxParser
from rag.settings import cron_logger
class Docx(DocxParser):
def __init__(self):
pass
def __clean(self, line):
line = re.sub(r"\u3000", " ", line).strip()
return line
def __call__(self, filename, binary=None, from_page=0, to_page=100000):
self.doc = Document(
filename) if not binary else Document(BytesIO(binary))
pn = 0
lines = []
for p in self.doc.paragraphs:
if pn > to_page:
break
if from_page <= pn < to_page and p.text.strip():
lines.append(self.__clean(p.text))
for run in p.runs:
if 'lastRenderedPageBreak' in run._element.xml:
pn += 1
continue
if 'w:br' in run._element.xml and 'type="page"' in run._element.xml:
pn += 1
tbls = []
for tb in self.doc.tables:
html= "<table>"
for r in tb.rows:
html += "<tr>"
i = 0
while i < len(r.cells):
span = 1
c = r.cells[i]
for j in range(i+1, len(r.cells)):
if c.text == r.cells[j].text:
span += 1
i = j
i += 1
html += f"<td>{c.text}</td>" if span == 1 else f"<td colspan='{span}'>{c.text}</td>"
html += "</tr>"
html += "</table>"
tbls.append(((None, html), ""))
return [(l, "") for l in lines if l], tbls
class Pdf(PdfParser):
def __call__(self, filename, binary=None, from_page=0,
......@@ -75,8 +120,8 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
sections = []
if re.search(r"\.docx?$", filename, re.IGNORECASE):
callback(0.1, "Start to parse.")
for txt in laws.Docx()(filename, binary):
sections.append((txt, ""))
sections, tbls = Docx()(filename, binary)
res = tokenize_table(tbls, doc, eng)
callback(0.8, "Finish parsing.")
elif re.search(r"\.pdf$", filename, re.IGNORECASE):
......
......@@ -223,8 +223,8 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000,
continue
if not str(row[clmns[j]]):
continue
if pd.isna(row[clmns[j]]):
continue
#if pd.isna(row[clmns[j]]):
# continue
fld = clmns_map[j][0]
d[fld] = row[clmns[j]] if clmn_tys[j] != "text" else huqie.qie(
row[clmns[j]])
......
......@@ -170,3 +170,4 @@ class LocalLLM(Base):
return ans, num_tokens_from_string(ans)
except Exception as e:
return "**ERROR**: " + str(e), 0
......@@ -68,6 +68,7 @@ def bullets_category(sections):
def is_english(texts):
eng = 0
if not texts: return False
for t in texts:
if re.match(r"[a-zA-Z]{2,}", t.strip()):
eng += 1
......@@ -112,8 +113,8 @@ def tokenize_table(tbls, doc, eng, batch_size=10):
d = copy.deepcopy(doc)
tokenize(d, rows, eng)
d["content_with_weight"] = rows
d["image"] = img
add_positions(d, poss)
if img: d["image"] = img
if poss: add_positions(d, poss)
res.append(d)
continue
de = "; " if eng else ""
......
......@@ -46,7 +46,7 @@ class Dealer:
"k": topk,
"similarity": sim,
"num_candidates": topk * 2,
"query_vector": qv
"query_vector": list(qv)
}
def search(self, req, idxnm, emb_mdl=None):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment