From c13ac0f67c244a56cf6b5e1dfca74f0ff07c0eb7 Mon Sep 17 00:00:00 2001
From: WangJun <thoughts.times@gmail.com>
Date: Tue, 12 Nov 2024 09:28:29 +0800
Subject: [PATCH 1/5] remove the openwebui in inference-cpp-xpu dockerfile

---
 docker/llm/inference-cpp/Dockerfile | 14 --------------
 docker/llm/inference-cpp/README.md  | 23 ++++++++++++++++++-----
 2 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/docker/llm/inference-cpp/Dockerfile b/docker/llm/inference-cpp/Dockerfile
index da9c24dfbbc..0645e35159d 100644
--- a/docker/llm/inference-cpp/Dockerfile
+++ b/docker/llm/inference-cpp/Dockerfile
@@ -45,19 +45,6 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO
     # Install opencl-related repos
     apt-get update && \
     apt-get install -y --no-install-recommends intel-opencl-icd intel-level-zero-gpu=1.3.26241.33-647~22.04 level-zero level-zero-dev --allow-downgrades && \
-    # install nodejs and npm and get webui
-    apt purge nodejs -y && \
-    apt purge libnode-dev -y && \
-    apt autoremove -y && \
-    apt clean -y && \
-    wget -qO- https://deb.nodesource.com/setup_18.x | sudo -E bash - && \
-    apt install -y nodejs && \
-    git clone https://github.com/open-webui/open-webui.git /llm/open-webui && \
-    git -C /llm/open-webui checkout e29a999dc910afad91995221cb4bb7c274f87cd6 && \
-    cp -RPp /llm/open-webui/.env.example /llm/open-webui/.env && \
-    # Build frontend
-    npm --prefix /llm/open-webui i && \
-    npm --prefix /llm/open-webui run build && \
     # Install Dependencies
     # remove blinker to avoid error
     find /usr/lib/python3/dist-packages/ -name 'blinker*' -exec rm -rf {} + && \
@@ -67,7 +54,6 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO
 
 COPY ./start-llama-cpp.sh /llm/scripts/start-llama-cpp.sh
 COPY ./start-ollama.sh /llm/scripts/start-ollama.sh
-COPY ./start-open-webui.sh /llm/scripts/start-open-webui.sh
 COPY ./benchmark_llama-cpp.sh /llm/scripts/benchmark_llama-cpp.sh
 
 WORKDIR /llm/
diff --git a/docker/llm/inference-cpp/README.md b/docker/llm/inference-cpp/README.md
index cf1f9e39a59..1e396a80a1f 100644
--- a/docker/llm/inference-cpp/README.md
+++ b/docker/llm/inference-cpp/README.md
@@ -154,12 +154,25 @@ Please refer to this [documentation](https://ipex-llm.readthedocs.io/en/latest/d
 
 ### Running Open WebUI with Intel GPU
 
-Start the ollama and load the model first, then use the open-webui to chat.
-If you have difficulty accessing the huggingface repositories, you may use a mirror, e.g. add export HF_ENDPOINT=https://hf-mirror.com before running bash start.sh.
+1. Start the ollama and load the model first, then use the open-webui to chat.
+
+If you have difficulty accessing the huggingface repositories, you may use a mirror, e.g. add export HF_ENDPOINT=https://hf-mirror.com and run following script to start open-webui docker.
+
 ```bash
-cd /llm/scripts/
-bash start-open-webui.sh
-# INFO:     Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit)
+export DOCKER_IMAGE=ghcr.io/open-webui/open-webui:main
+export CONTAINER_NAME=<YOUR-DOCKER-CONTAINER-NAME>
+
+docker rm -f $CONTAINER_NAME
+
+docker run -itd \
+            -v open-webui:/app/backend/data \
+            -e PORT=8080 \
+            --privileged \
+            --network=host \
+            --name $CONTAINER_NAME \
+            --restart always $DOCKER_IMAGE
 ```
 
+2. Visit <http://localhost:8080> to use open-webui, the default ollama serve address in open-webui is `http://localhost:11434`, you can change it in connections on `http://localhost:8080/admin/settings`.
+
 For how to log-in or other guide, Please refer to this [documentation](https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/open_webui_with_ollama_quickstart.html) for more details.

From 8b667ea47d4dd70f6e639440275d3da42c12ce8e Mon Sep 17 00:00:00 2001
From: WangJun <thoughts.times@gmail.com>
Date: Tue, 12 Nov 2024 12:40:20 +0800
Subject: [PATCH 2/5] update docker_cpp_xpu_quickstart.md

---
 .../DockerGuides/docker_cpp_xpu_quickstart.md | 21 +++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/docs/mddocs/DockerGuides/docker_cpp_xpu_quickstart.md b/docs/mddocs/DockerGuides/docker_cpp_xpu_quickstart.md
index 24029b0f7ff..e2bcde4e22e 100644
--- a/docs/mddocs/DockerGuides/docker_cpp_xpu_quickstart.md
+++ b/docs/mddocs/DockerGuides/docker_cpp_xpu_quickstart.md
@@ -196,14 +196,27 @@ Please refer to this [documentation](../Quickstart/ollama_quickstart.md#4-pull-m
 
 ### Running Open WebUI with Intel GPU
 
-Start the ollama and load the model first, then use the open-webui to chat.
-If you have difficulty accessing the huggingface repositories, you may use a mirror, e.g. add `export HF_ENDPOINT=https://hf-mirror.com`before running bash start.sh.
+1. Start the ollama and load the model first, then use the open-webui to chat. If you have difficulty accessing the huggingface repositories, you may use a mirror, e.g. add export HF_ENDPOINT=<https://hf-mirror.com> and run following script to start open-webui docker.
+
 ```bash
-cd /llm/scripts/
-bash start-open-webui.sh
+export DOCKER_IMAGE=ghcr.io/open-webui/open-webui:main
+export CONTAINER_NAME=<YOUR-DOCKER-CONTAINER-NAME>
+
+docker rm -f $CONTAINER_NAME
+
+docker run -itd \
+            -v open-webui:/app/backend/data \
+            -e PORT=8080 \
+            --privileged \
+            --network=host \
+            --name $CONTAINER_NAME \
+            --restart always $DOCKER_IMAGE
 ```
 
+2. Visit <http://localhost:8080> to use open-webui, the default ollama serve address in open-webui is `http://localhost:11434`, you can change it in connections on `http://localhost:8080/admin/settings`.
+
 Sample output:
+
 ```bash
 INFO:     Started server process [1055]
 INFO:     Waiting for application startup.

From 81987823ce9d7bdec7cd300012cd2bfac2d2ced3 Mon Sep 17 00:00:00 2001
From: WangJun <thoughts.times@gmail.com>
Date: Tue, 12 Nov 2024 12:48:43 +0800
Subject: [PATCH 3/5] add sample output in inference-cpp/readme

---
 docker/llm/inference-cpp/README.md | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/docker/llm/inference-cpp/README.md b/docker/llm/inference-cpp/README.md
index 1e396a80a1f..d5847a7aa57 100644
--- a/docker/llm/inference-cpp/README.md
+++ b/docker/llm/inference-cpp/README.md
@@ -154,9 +154,7 @@ Please refer to this [documentation](https://ipex-llm.readthedocs.io/en/latest/d
 
 ### Running Open WebUI with Intel GPU
 
-1. Start the ollama and load the model first, then use the open-webui to chat.
-
-If you have difficulty accessing the huggingface repositories, you may use a mirror, e.g. add export HF_ENDPOINT=https://hf-mirror.com and run following script to start open-webui docker.
+1. Start the ollama and load the model first, then use the open-webui to chat. If you have difficulty accessing the huggingface repositories, you may use a mirror, e.g. add export HF_ENDPOINT=<https://hf-mirror.com> and run following script to start open-webui docker.
 
 ```bash
 export DOCKER_IMAGE=ghcr.io/open-webui/open-webui:main
@@ -175,4 +173,17 @@ docker run -itd \
 
 2. Visit <http://localhost:8080> to use open-webui, the default ollama serve address in open-webui is `http://localhost:11434`, you can change it in connections on `http://localhost:8080/admin/settings`.
 
-For how to log-in or other guide, Please refer to this [documentation](https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/open_webui_with_ollama_quickstart.html) for more details.
+Sample output:
+
+```bash
+INFO:     Started server process [1055]
+INFO:     Waiting for application startup.
+INFO:     Application startup complete.
+INFO:     Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit)
+```
+
+<a href="https://llm-assets.readthedocs.io/en/latest/_images/open_webui_signup.png" target="_blank">
+  <img src="https://llm-assets.readthedocs.io/en/latest/_images/open_webui_signup.png" width="100%" />
+</a>
+
+For how to log-in or other guide, Please refer to this [documentation](../Quickstart/open_webui_with_ollama_quickstart.md) for more details.

From fecf05506547401435703d9f8a0568f8d5fe9fd4 Mon Sep 17 00:00:00 2001
From: WangJun <thoughts.times@gmail.com>
Date: Tue, 12 Nov 2024 15:38:11 +0800
Subject: [PATCH 4/5] remove the openwebui in main readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b3314dc3eb1..91c2f81b985 100644
--- a/README.md
+++ b/README.md
@@ -177,7 +177,7 @@ Please see the **Perplexity** result below (tested on Wikitext dataset using the
 ## `ipex-llm` Quickstart
 
 ### Docker
-- [GPU Inference in C++](docs/mddocs/DockerGuides/docker_cpp_xpu_quickstart.md): running `llama.cpp`, `ollama`, `OpenWebUI`, etc., with `ipex-llm` on Intel GPU
+- [GPU Inference in C++](docs/mddocs/DockerGuides/docker_cpp_xpu_quickstart.md): running `llama.cpp`, `ollama`, etc., with `ipex-llm` on Intel GPU
 - [GPU Inference in Python](docs/mddocs/DockerGuides/docker_pytorch_inference_gpu.md) : running HuggingFace `transformers`, `LangChain`, `LlamaIndex`, `ModelScope`, etc. with `ipex-llm` on Intel GPU
 - [vLLM on GPU](docs/mddocs/DockerGuides/vllm_docker_quickstart.md): running `vLLM` serving with `ipex-llm` on Intel GPU
 - [vLLM on CPU](docs/mddocs/DockerGuides/vllm_cpu_docker_quickstart.md): running `vLLM` serving with `ipex-llm` on Intel CPU  

From 23d958c884c7f2d86c0591e642a23fd354069b54 Mon Sep 17 00:00:00 2001
From: WangJun <thoughts.times@gmail.com>
Date: Tue, 12 Nov 2024 15:38:44 +0800
Subject: [PATCH 5/5] remove the openwebui in main readme

---
 README.zh-CN.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.zh-CN.md b/README.zh-CN.md
index 2ec6fdf1c8f..7ff535235d8 100644
--- a/README.zh-CN.md
+++ b/README.zh-CN.md
@@ -177,7 +177,7 @@ See the demo of running [*Text-Generation-WebUI*](https://ipex-llm.readthedocs.i
 ## `ipex-llm` 快速入门
 
 ### Docker
-- [GPU Inference in C++](docs/mddocs/DockerGuides/docker_cpp_xpu_quickstart.md): 在 Intel GPU 上使用 `ipex-llm` 运行 `llama.cpp`, `ollama`, `OpenWebUI`,等
+- [GPU Inference in C++](docs/mddocs/DockerGuides/docker_cpp_xpu_quickstart.md): 在 Intel GPU 上使用 `ipex-llm` 运行 `llama.cpp`, `ollama`等
 - [GPU Inference in Python](docs/mddocs/DockerGuides/docker_pytorch_inference_gpu.md) : 在 Intel GPU 上使用 `ipex-llm` 运行 HuggingFace `transformers`, `LangChain`, `LlamaIndex`, `ModelScope`,等
 - [vLLM on GPU](docs/mddocs/DockerGuides/vllm_docker_quickstart.md): 在 Intel GPU 上使用 `ipex-llm` 运行 `vLLM` 推理服务
 - [vLLM on CPU](docs/mddocs/DockerGuides/vllm_cpu_docker_quickstart.md): 在 Intel CPU 上使用 `ipex-llm` 运行 `vLLM` 推理服务