From 8ffc09cb5ce3814caecf0a01b016e4a8bb52c3bf Mon Sep 17 00:00:00 2001 From: KevinHuSh <kevinhu.sh@gmail.com> Date: Thu, 11 Apr 2024 18:25:37 +0800 Subject: [PATCH] Support Xinference (#321) ### What problem does this PR solve? Issue link:#299 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- docs/xinference.md | 43 +++++++++++++++++++++++++++++++++++++++++++ rag/llm/cv_model.py | 3 ++- 2 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 docs/xinference.md diff --git a/docs/xinference.md b/docs/xinference.md new file mode 100644 index 0000000..2a8feef --- /dev/null +++ b/docs/xinference.md @@ -0,0 +1,43 @@ +# Xinference + +<div align="center" style="margin-top:20px;margin-bottom:20px;"> +<img src="https://github.com/infiniflow/ragflow/assets/12318111/2c5e86a7-807b-4d29-bd2b-f73fb1018866" width="130"/> +</div> + +Xorbits Inference([Xinference](https://github.com/xorbitsai/inference)) empowers you to unleash the full potential of cutting-edge AI models. + +## Install + +- [pip install "xinference[all]"](https://inference.readthedocs.io/en/latest/getting_started/installation.html) +- [Docker](https://inference.readthedocs.io/en/latest/getting_started/using_docker_image.html) + +To start a local instance of Xinference, run the following command: +```bash +$ xinference-local --host 0.0.0.0 --port 9997 +``` +## Launch Xinference + +Decide which LLM you want to deploy ([here's a list for supported LLM](https://inference.readthedocs.io/en/latest/models/builtin/)), say, **mistral**. +Execute the following command to launch the model, remember to replace ${quantization} with your chosen quantization method from the options listed above: +```bash +$ xinference launch -u mistral --model-name mistral-v0.1 --size-in-billions 7 --model-format pytorch --quantization ${quantization} +``` + +## Use Xinference in RAGFlow + +- Go to 'Settings > Model Providers > Models to be added > Xinference'. + +<div align="center" style="margin-top:20px;margin-bottom:20px;"> +<img src="https://github.com/infiniflow/ragflow/assets/12318111/bcbf4d7a-ade6-44c7-ad5f-0a92c8a73789" width="1300"/> +</div> + +> Base URL: Enter the base URL where the Ollama service is accessible, like, http://<your-ollama-endpoint-domain>:11434 + +- Use Xinference Models. + +<div align="center" style="margin-top:20px;margin-bottom:20px;"> +<img src="https://github.com/infiniflow/ragflow/assets/12318111/b01fcb6f-47c9-4777-82e0-f1e947ed615a" width="530"/> +</div> +<div align="center" style="margin-top:20px;margin-bottom:20px;"> +<img src="https://github.com/infiniflow/ragflow/assets/12318111/1763dcd1-044f-438d-badd-9729f5b3a144" width="530"/> +</div> \ No newline at end of file diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py index 4b96699..010883a 100644 --- a/rag/llm/cv_model.py +++ b/rag/llm/cv_model.py @@ -161,9 +161,10 @@ class OllamaCV(Base): except Exception as e: return "**ERROR**: " + str(e), 0 + class XinferenceCV(Base): def __init__(self, key, model_name="", lang="Chinese", base_url=""): - self.client = OpenAI(api_key=key, base_url=base_url) + self.client = OpenAI(api_key="xxx", base_url=base_url) self.model_name = model_name self.lang = lang -- GitLab