From a7be5d4e8ba48f84fd6c074de06e9bdb49b7630d Mon Sep 17 00:00:00 2001 From: jie yang <55631829+yangjie407@users.noreply.github.com> Date: Tue, 16 Apr 2024 12:29:58 +0800 Subject: [PATCH] build ragflow image from scratch (#376) ### What problem does this PR solve? issue: #205 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- Dockerfile | 40 +++++++++++------------ Dockerfile.scratch | 54 +++++++++++++++++++++++++++++++ deepdoc/parser/resume/step_two.py | 4 +-- requirements.txt | 2 +- 4 files changed, 77 insertions(+), 23 deletions(-) create mode 100644 Dockerfile.scratch diff --git a/Dockerfile b/Dockerfile index c174ccb..c981509 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,20 +1,20 @@ -FROM swr.cn-north-4.myhuaweicloud.com/infiniflow/ragflow-base:v1.0 -USER root - -WORKDIR /ragflow - -ADD ./web ./web -RUN cd ./web && npm i && npm run build - -ADD ./api ./api -ADD ./conf ./conf -ADD ./deepdoc ./deepdoc -ADD ./rag ./rag - -ENV PYTHONPATH=/ragflow/ -ENV HF_ENDPOINT=https://hf-mirror.com - -ADD docker/entrypoint.sh ./entrypoint.sh -RUN chmod +x ./entrypoint.sh - -ENTRYPOINT ["./entrypoint.sh"] +FROM swr.cn-north-4.myhuaweicloud.com/infiniflow/ragflow-base:v1.0 +USER root + +WORKDIR /ragflow + +ADD ./web ./web +RUN cd ./web && npm i && npm run build + +ADD ./api ./api +ADD ./conf ./conf +ADD ./deepdoc ./deepdoc +ADD ./rag ./rag + +ENV PYTHONPATH=/ragflow/ +ENV HF_ENDPOINT=https://hf-mirror.com + +ADD docker/entrypoint.sh ./entrypoint.sh +RUN chmod +x ./entrypoint.sh + +ENTRYPOINT ["./entrypoint.sh"] \ No newline at end of file diff --git a/Dockerfile.scratch b/Dockerfile.scratch new file mode 100644 index 0000000..3e9483e --- /dev/null +++ b/Dockerfile.scratch @@ -0,0 +1,54 @@ +FROM ubuntu:22.04 +USER root + +WORKDIR /ragflow + +RUN apt-get update && apt-get install -y wget curl build-essential libopenmpi-dev + +RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \ + bash ~/miniconda.sh -b -p /root/miniconda3 && \ + rm ~/miniconda.sh && ln -s /root/miniconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ + echo ". /root/miniconda3/etc/profile.d/conda.sh" >> ~/.bashrc && \ + echo "conda activate base" >> ~/.bashrc + +ENV PATH /root/miniconda3/bin:$PATH + +RUN conda create -y --name py11 python=3.11 + +ENV CONDA_DEFAULT_ENV py11 +ENV CONDA_PREFIX /root/miniconda3/envs/py11 +ENV PATH $CONDA_PREFIX/bin:$PATH + +RUN curl -sL https://deb.nodesource.com/setup_14.x | bash - +RUN apt-get install -y nodejs + +RUN apt-get install -y nginx + +ADD ./web ./web +ADD ./api ./api +ADD ./conf ./conf +ADD ./deepdoc ./deepdoc +ADD ./rag ./rag +ADD ./requirements.txt ./requirements.txt + +RUN apt install openmpi-bin openmpi-common libopenmpi-dev +ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu/openmpi/lib:$LD_LIBRARY_PATH +RUN rm /root/miniconda3/envs/py11/compiler_compat/ld +RUN cd ./web && npm i && npm run build +RUN conda run -n py11 pip install -i https://mirrors.aliyun.com/pypi/simple/ -r ./requirements.txt + +RUN apt-get update && \ + apt-get install -y libglib2.0-0 libgl1-mesa-glx && \ + rm -rf /var/lib/apt/lists/* + +RUN conda run -n py11 pip install -i https://mirrors.aliyun.com/pypi/simple/ ollama +RUN conda run -n py11 python -m nltk.downloader punkt +RUN conda run -n py11 python -m nltk.downloader wordnet + +ENV PYTHONPATH=/ragflow/ +ENV HF_ENDPOINT=https://hf-mirror.com + +ADD docker/entrypoint.sh ./entrypoint.sh +RUN chmod +x ./entrypoint.sh + +ENTRYPOINT ["./entrypoint.sh"] diff --git a/deepdoc/parser/resume/step_two.py b/deepdoc/parser/resume/step_two.py index 207cc79..20b0223 100644 --- a/deepdoc/parser/resume/step_two.py +++ b/deepdoc/parser/resume/step_two.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -import re, copy, time, datetime, demjson, \ +import re, copy, time, datetime, demjson3, \ traceback, signal import numpy as np from deepdoc.parser.resume.entities import degrees, schools, corporations @@ -197,7 +197,7 @@ def forProj(cv): def json_loads(line): - return demjson.decode(re.sub(r": *(True|False)", r": '\1'", line)) + return demjson3.decode(re.sub(r": *(True|False)", r": '\1'", line)) def forWork(cv): diff --git a/requirements.txt b/requirements.txt index 9e32969..9cf9234 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,7 +19,7 @@ cryptography==42.0.5 dashscope==1.14.1 datasets==2.17.1 datrie==0.8.2 -demjson==2.2.4 +demjson3==3.0.6 dill==0.3.8 distro==1.9.0 elastic-transport==8.12.0 -- GitLab