From 84ac54b4f9564a439f07df82ee6ab7277dd6a6f1 Mon Sep 17 00:00:00 2001 From: chenzihong <522023320011@smail.nju.edu.cn> Date: Tue, 13 Aug 2024 16:30:38 +0800 Subject: [PATCH 01/31] feat(llm): integrate fastapi --- hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py | 1 + 1 file changed, 1 insertion(+) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index 01bc85c4..3d75b08f 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -434,6 +434,7 @@ def apply_embedding_configuration(arg1, arg2, arg3): btn = gr.Button("(BETA) Init HugeGraph test data (🚧WIP)") btn.click(fn=init_hg_test_data, inputs=inp, outputs=out) # pylint: disable=no-member + # TODO: we need to mount gradio to a FastAPI app to provide api service app = gr.mount_gradio_app(app, hugegraph_llm, path="/") # Note: set reload to False in production environment uvicorn.run(app, host=args.host, port=args.port) From c67b43e50102ce6cd5288727c5b8b57c08015278 Mon Sep 17 00:00:00 2001 From: chenzihong <522023320011@smail.nju.edu.cn> Date: Tue, 13 Aug 2024 20:30:07 +0800 Subject: [PATCH 02/31] feat: graph rag api demo --- hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index 3d75b08f..44eedf4b 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -435,6 +435,15 @@ def apply_embedding_configuration(arg1, arg2, arg3): btn.click(fn=init_hg_test_data, inputs=inp, outputs=out) # pylint: disable=no-member # TODO: we need to mount gradio to a FastAPI app to provide api service + + + @app.get("/graph_rag") + def graph_rag_api(text: str): + result = graph_rag(text, "false", "true", "false", "false") + return {"raw_answer": result[0], "vector_only_answer": result[1], + "graph_only_answer": result[2], "graph_vector_answer": result[3]} + + app = gr.mount_gradio_app(app, hugegraph_llm, path="/") # Note: set reload to False in production environment uvicorn.run(app, host=args.host, port=args.port) From 57125545251099d8f4481f41e4b5471ccb29c316 Mon Sep 17 00:00:00 2001 From: Hongjun Li Date: Wed, 14 Aug 2024 09:53:38 +0800 Subject: [PATCH 03/31] feat(rag_web_demo): Synchronize hg-ai branch "graphspace" --- .../src/hugegraph_llm/demo/rag_web_demo.py | 90 ++++++++++++------- 1 file changed, 60 insertions(+), 30 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index 44eedf4b..576fffac 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -19,12 +19,15 @@ import json import argparse import os +from typing import Optional import requests import uvicorn import docx import gradio as gr from fastapi import FastAPI +from pydantic import BaseModel +from requests.auth import HTTPBasicAuth from hugegraph_llm.models.llms.init_llm import LLMs from hugegraph_llm.models.embeddings.init_embedding import Embeddings @@ -51,12 +54,12 @@ def convert_bool_str(string): # TODO: enhance/distinguish the "graph_rag" name to avoid confusion -def graph_rag(text: str, raw_answer: str, vector_only_answer: str, - graph_only_answer: str, graph_vector_answer): - vector_search = convert_bool_str(vector_only_answer) or convert_bool_str(graph_vector_answer) - graph_search = convert_bool_str(graph_only_answer) or convert_bool_str(graph_vector_answer) +def graph_rag(text: str, raw_answer: bool, vector_only_answer: bool, + graph_only_answer: bool, graph_vector_answer: bool): + vector_search = vector_only_answer or graph_vector_answer + graph_search = graph_only_answer or graph_vector_answer - if raw_answer == "false" and not vector_search and not graph_search: + if raw_answer == False and not vector_search and not graph_search: gr.Warning("Please select at least one generate mode.") return "", "", "", "" searcher = GraphRAG() @@ -65,10 +68,10 @@ def graph_rag(text: str, raw_answer: str, vector_only_answer: str, if graph_search: searcher.extract_keyword().match_keyword_to_id().query_graph_for_rag() searcher.merge_dedup_rerank().synthesize_answer( - raw_answer=convert_bool_str(raw_answer), - vector_only_answer=convert_bool_str(vector_only_answer), - graph_only_answer=convert_bool_str(graph_only_answer), - graph_vector_answer=convert_bool_str(graph_vector_answer) + raw_answer=raw_answer, + vector_only_answer=vector_only_answer, + graph_only_answer=graph_only_answer, + graph_vector_answer=graph_vector_answer ).run(verbose=True, query=text) try: @@ -141,6 +144,14 @@ def build_kg(file, schema, example_prompt, build_mode): # pylint: disable=too-m raise gr.Error(str(e)) +class RAGRequest(BaseModel): + query: str + raw_llm: Optional[bool] = None + vector_only: Optional[bool] = None + graph_only: Optional[bool] = None + graph_vector: Optional[bool] = None + + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--host", type=str, default="0.0.0.0", help="host") @@ -159,18 +170,19 @@ def build_kg(file, schema, example_prompt, build_mode): # pylint: disable=too-m gr.Textbox(value=str(settings.graph_port), label="port"), gr.Textbox(value=settings.graph_name, label="graph"), gr.Textbox(value=settings.graph_user, label="user"), - gr.Textbox(value=settings.graph_pwd, label="pwd") + gr.Textbox(value=settings.graph_pwd, label="pwd", type="password"), + gr.Textbox(value=settings.graph_space, label="graphspace (None)"), ] graph_config_button = gr.Button("apply configuration") - def test_api_connection(url, method="GET", ak=None, sk=None, headers=None, body=None): + def test_api_connection(url, method="GET", headers=None, body=None, auth=None): # TODO: use fastapi.request / starlette instead? (Also add a try-catch here) log.debug("Request URL: %s", url) if method.upper() == "GET": - response = requests.get(url, headers=headers, timeout=5) + response = requests.get(url, headers=headers, timeout=5, auth=auth) elif method.upper() == "POST": - response = requests.post(url, headers=headers, json=body, timeout=5) + response = requests.post(url, headers=headers, json=body, timeout=5, auth=auth) else: log.error("Unsupported method: %s", method) return @@ -184,14 +196,20 @@ def test_api_connection(url, method="GET", ak=None, sk=None, headers=None, body= gr.Error(f"Connection failed with status code: {response.status_code}") - def apply_graph_configuration(ip, port, name, user, pwd): + def apply_graph_configuration(ip, port, name, user, pwd, gs): settings.graph_ip = ip settings.graph_port = int(port) settings.graph_name = name settings.graph_user = user settings.graph_pwd = pwd - test_url = f"http://{ip}:{port}/graphs/{name}/schema" - test_api_connection(test_url) + settings.graph_space = gs + # Test graph connection (Auth) + if gs and gs.strip(): + test_url = f"http://{ip}:{port}/graphspaces/{gs}/graphs/{name}/schema" + else: + test_url = f"http://{ip}:{port}/graphs/{name}/schema" + auth = HTTPBasicAuth(user, pwd) + test_api_connection(test_url, auth=auth) settings.update_env() @@ -249,7 +267,7 @@ def apply_llm_configuration(arg1, arg2, arg3, arg4): settings.openai_max_tokens = int(arg4) test_url = settings.openai_api_base + "/models" headers = {"Authorization": f"Bearer {arg1}"} - test_api_connection(test_url, headers=headers, ak=arg1) + test_api_connection(test_url, headers=headers) elif llm_option == "qianfan_wenxin": settings.qianfan_api_key = arg1 settings.qianfan_secret_key = arg2 @@ -312,7 +330,7 @@ def apply_embedding_configuration(arg1, arg2, arg3): settings.openai_embedding_model = arg3 test_url = settings.openai_api_base + "/models" headers = {"Authorization": f"Bearer {arg1}"} - test_api_connection(test_url, headers=headers, ak=arg1) + test_api_connection(test_url, headers=headers) elif embedding_option == "ollama": settings.ollama_host = arg1 settings.ollama_port = int(arg2) @@ -406,17 +424,18 @@ def apply_embedding_configuration(arg1, arg2, arg3): graph_only_out = gr.Textbox(label="Graph-only Answer", show_copy_button=True) graph_vector_out = gr.Textbox(label="Graph-Vector Answer", show_copy_button=True) with gr.Column(scale=1): - raw_radio = gr.Radio(choices=["true", "false"], value="false", + raw_radio = gr.Radio(choices=[True, False], value=True, label="Basic LLM Answer") - vector_only_radio = gr.Radio(choices=["true", "false"], value="true", + vector_only_radio = gr.Radio(choices=[True, False], value=False, label="Vector-only Answer") - graph_only_radio = gr.Radio(choices=["true", "false"], value="false", + graph_only_radio = gr.Radio(choices=[True, False], value=False, label="Graph-only Answer") - graph_vector_radio = gr.Radio(choices=["true", "false"], value="false", + graph_vector_radio = gr.Radio(choices=[True, False], value=False, label="Graph-Vector Answer") btn = gr.Button("Answer Question") - btn.click(fn=graph_rag, inputs=[inp, raw_radio, vector_only_radio, graph_only_radio, # pylint: disable=no-member - graph_vector_radio], + btn.click(fn=graph_rag, + inputs=[inp, raw_radio, vector_only_radio, graph_only_radio, # pylint: disable=no-member + graph_vector_radio], outputs=[raw_out, vector_only_out, graph_only_out, graph_vector_out]) gr.Markdown("""## 3. Others (🚧) """) @@ -434,16 +453,27 @@ def apply_embedding_configuration(arg1, arg2, arg3): btn = gr.Button("(BETA) Init HugeGraph test data (🚧WIP)") btn.click(fn=init_hg_test_data, inputs=inp, outputs=out) # pylint: disable=no-member - # TODO: we need to mount gradio to a FastAPI app to provide api service - - - @app.get("/graph_rag") - def graph_rag_api(text: str): - result = graph_rag(text, "false", "true", "false", "false") + @app.get("/rag/{query}") + def graph_rag_api(query: str): + result = graph_rag(query, True, True, True, True) return {"raw_answer": result[0], "vector_only_answer": result[1], "graph_only_answer": result[2], "graph_vector_answer": result[3]} + @app.post("/rag") + def graph_rag_api(req: RAGRequest): + result = graph_rag(req.query, req.raw_llm, req.vector_only, req.graph_only, req.graph_vector) + return {key: value for key, value in zip( + ["raw_llm", "vector_only", "graph_only", "graph_vector"], result) if getattr(req, key)} + + + @app.get("/rag/graph/{query}") + def graph_rag_api(query: str): + result = graph_rag(query, False, False, True, False) + log.debug(result) + return {"graph_only_answer": result[2]} + + app = gr.mount_gradio_app(app, hugegraph_llm, path="/") # Note: set reload to False in production environment uvicorn.run(app, host=args.host, port=args.port) From 1d0cffe0e6c3b33789c3f9a51e0f4fe6acec5f63 Mon Sep 17 00:00:00 2001 From: Hongjun Li Date: Wed, 14 Aug 2024 10:15:42 +0800 Subject: [PATCH 04/31] feat(config): Add configuration for graph_space. --- hugegraph-llm/src/hugegraph_llm/config/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hugegraph-llm/src/hugegraph_llm/config/config.py b/hugegraph-llm/src/hugegraph_llm/config/config.py index 62d41d41..c1476c9d 100644 --- a/hugegraph-llm/src/hugegraph_llm/config/config.py +++ b/hugegraph-llm/src/hugegraph_llm/config/config.py @@ -67,7 +67,7 @@ class Config: """HugeGraph settings""" graph_ip: Optional[str] = "127.0.0.1" graph_port: Optional[int] = 8080 - # graph_space: Optional[str] = "DEFAULT" + graph_space: Optional[str] = "DEFAULT" graph_name: Optional[str] = "hugegraph" graph_user: Optional[str] = "admin" graph_pwd: Optional[str] = "xxx" From 38a45692d3f85b04f6835bd85d72cd2b6e101252 Mon Sep 17 00:00:00 2001 From: Hongjun Li Date: Wed, 14 Aug 2024 19:49:14 +0800 Subject: [PATCH 05/31] feat(rag_web_demo): graphConfig interface encapsulates and tests. --- .../src/hugegraph_llm/demo/rag_web_demo.py | 71 +++++++++++++++---- 1 file changed, 58 insertions(+), 13 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index 576fffac..584a232f 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -151,6 +151,29 @@ class RAGRequest(BaseModel): graph_only: Optional[bool] = None graph_vector: Optional[bool] = None +class GraphConfigRequest(BaseModel): + ip: str + port: str + name: str + user: str + pwd: str + gs: str + +class LLMConfigRequest(BaseModel): + llm_type: str + # The common parameters shared by OpenAI, Qianfan Wenxin, and OLLAMA platforms. + api_key: str + api_base: str + language_model: str + # Openai-only properties + max_tokens: str = None + # qianfan-wenxin-only properties + secret_key: str = None + # ollama-only properties + host: str = None + port: str = None + + if __name__ == "__main__": parser = argparse.ArgumentParser() @@ -185,15 +208,20 @@ def test_api_connection(url, method="GET", headers=None, body=None, auth=None): response = requests.post(url, headers=headers, json=body, timeout=5, auth=auth) else: log.error("Unsupported method: %s", method) - return + # for http api return status + return {"status": "Unsupported method: " + method} if 200 <= response.status_code < 300: log.info("Connection successful. Configured finished.") gr.Info("Connection successful. Configured finished.") + # for http api return status + return {"status": "Connection successful. Configured finished."} else: log.error("Connection failed with status code: %s", response.status_code) # pylint: disable=pointless-exception-statement gr.Error(f"Connection failed with status code: {response.status_code}") + # for http api return status + return {"status": "Connection failed with status code: " + str(response.status_code)} def apply_graph_configuration(ip, port, name, user, pwd, gs): @@ -209,8 +237,10 @@ def apply_graph_configuration(ip, port, name, user, pwd, gs): else: test_url = f"http://{ip}:{port}/graphs/{name}/schema" auth = HTTPBasicAuth(user, pwd) - test_api_connection(test_url, auth=auth) + # for http api return status + result = test_api_connection(test_url, auth=auth) settings.update_env() + return result graph_config_button.click(apply_graph_configuration, inputs=graph_config_input) # pylint: disable=no-member @@ -292,6 +322,8 @@ def apply_llm_configuration(arg1, arg2, arg3, arg4): ) + + @gr.render(inputs=[embedding_dropdown]) def embedding_settings(embedding_type): settings.embedding_type = embedding_type @@ -453,11 +485,19 @@ def apply_embedding_configuration(arg1, arg2, arg3): btn = gr.Button("(BETA) Init HugeGraph test data (🚧WIP)") btn.click(fn=init_hg_test_data, inputs=inp, outputs=out) # pylint: disable=no-member - @app.get("/rag/{query}") - def graph_rag_api(query: str): - result = graph_rag(query, True, True, True, True) - return {"raw_answer": result[0], "vector_only_answer": result[1], - "graph_only_answer": result[2], "graph_vector_answer": result[3]} + # @app.get("/rag/{query}") + # def graph_rag_api(query: str): + # result = graph_rag(query, True, True, True, True) + # return {"raw_answer": result[0], "vector_only_answer": result[1], + # "graph_only_answer": result[2], "graph_vector_answer": result[3]} + + # @app.get("/rag/graph/{query}") + # def graph_rag_api(query: str): + # result = graph_rag(query, False, False, True, False) + # log.debug(result) + # # return {"graph_only_answer": result[2]} + # return {"raw_answer": result[0], "vector_only_answer": result[1], + # "graph_only_answer": result[2], "graph_vector_answer": result[3]} @app.post("/rag") @@ -466,12 +506,17 @@ def graph_rag_api(req: RAGRequest): return {key: value for key, value in zip( ["raw_llm", "vector_only", "graph_only", "graph_vector"], result) if getattr(req, key)} - - @app.get("/rag/graph/{query}") - def graph_rag_api(query: str): - result = graph_rag(query, False, False, True, False) - log.debug(result) - return {"graph_only_answer": result[2]} + @app.post("/graph/config") + def graph_config_api(req: GraphConfigRequest): + result = apply_graph_configuration(req.ip, req.port, req.name, req.user, req.pwd, req.gs) + return json.dumps(result) + + # @app.post("/llm/config") + # def graph_config_api(req: LLMConfigRequest): + # settings.llm_type = req.llm_type + # if req.llm_type == req.llm_type: + # result = llm_settings.apply_llm_configuration(req.api_key, req.api_base, req.language_model, req.max_tokens) + # return json.dumps(result) app = gr.mount_gradio_app(app, hugegraph_llm, path="/") From 5dc6772f7e511eadc9b5143195b395c7a4a7529f Mon Sep 17 00:00:00 2001 From: Hongjun Li Date: Thu, 15 Aug 2024 17:25:32 +0800 Subject: [PATCH 06/31] feat(rag_web_demo): Encapsulate llm config, split llm config function --- .../src/hugegraph_llm/demo/rag_web_demo.py | 88 ++++++++----------- 1 file changed, 36 insertions(+), 52 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index 584a232f..df2a04af 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -152,12 +152,12 @@ class RAGRequest(BaseModel): graph_vector: Optional[bool] = None class GraphConfigRequest(BaseModel): - ip: str - port: str - name: str - user: str - pwd: str - gs: str + ip: str = "127.0.0.1" + port: str = "8080" + name: str = "hugegraph" + user: str = "xxx" + pwd: str = "xxx" + gs: str = "" class LLMConfigRequest(BaseModel): llm_type: str @@ -209,19 +209,20 @@ def test_api_connection(url, method="GET", headers=None, body=None, auth=None): else: log.error("Unsupported method: %s", method) # for http api return status - return {"status": "Unsupported method: " + method} + # return {"status": "Unsupported method: " + method} if 200 <= response.status_code < 300: log.info("Connection successful. Configured finished.") gr.Info("Connection successful. Configured finished.") # for http api return status - return {"status": "Connection successful. Configured finished."} + # return {"status": "Connection successful. Configured finished."} else: log.error("Connection failed with status code: %s", response.status_code) # pylint: disable=pointless-exception-statement gr.Error(f"Connection failed with status code: {response.status_code}") # for http api return status - return {"status": "Connection failed with status code: " + str(response.status_code)} + # return {"status": "Connection failed with status code: " + str(response.status_code)} + return {"status": "hello!!!"} def apply_graph_configuration(ip, port, name, user, pwd, gs): @@ -242,7 +243,6 @@ def apply_graph_configuration(ip, port, name, user, pwd, gs): settings.update_env() return result - graph_config_button.click(apply_graph_configuration, inputs=graph_config_input) # pylint: disable=no-member gr.Markdown("2. Set up the LLM.") @@ -253,6 +253,30 @@ def apply_graph_configuration(ip, port, name, user, pwd, gs): ) + def apply_llm_configuration(arg1, arg2, arg3, arg4): + llm_option = settings.llm_type + + if llm_option == "openai": + settings.openai_api_key = arg1 + settings.openai_api_base = arg2 + settings.openai_language_model = arg3 + settings.openai_max_tokens = int(arg4) + test_url = settings.openai_api_base + "/models" + headers = {"Authorization": f"Bearer {arg1}"} + test_api_connection(test_url, headers=headers) + elif llm_option == "qianfan_wenxin": + settings.qianfan_api_key = arg1 + settings.qianfan_secret_key = arg2 + settings.qianfan_language_model = arg3 + # TODO: test the connection + # test_url = "https://aip.baidubce.com/oauth/2.0/token" # POST + elif llm_option == "ollama": + settings.ollama_host = arg1 + settings.ollama_port = int(arg2) + settings.ollama_language_model = arg3 + gr.Info("configured!") + settings.update_env() + @gr.render(inputs=[llm_dropdown]) def llm_settings(llm_type): settings.llm_type = llm_type @@ -276,9 +300,9 @@ def llm_settings(llm_type): with gr.Row(): llm_config_input = [ gr.Textbox(value=settings.qianfan_api_key, label="api_key", - type="password"), + type="password"), gr.Textbox(value=settings.qianfan_secret_key, label="secret_key", - type="password"), + type="password"), gr.Textbox(value=settings.qianfan_language_model, label="model_name"), gr.Textbox(value="", visible=False) ] @@ -287,33 +311,8 @@ def llm_settings(llm_type): llm_config_input = [] llm_config_button = gr.Button("apply configuration") - def apply_llm_configuration(arg1, arg2, arg3, arg4): - llm_option = settings.llm_type - - if llm_option == "openai": - settings.openai_api_key = arg1 - settings.openai_api_base = arg2 - settings.openai_language_model = arg3 - settings.openai_max_tokens = int(arg4) - test_url = settings.openai_api_base + "/models" - headers = {"Authorization": f"Bearer {arg1}"} - test_api_connection(test_url, headers=headers) - elif llm_option == "qianfan_wenxin": - settings.qianfan_api_key = arg1 - settings.qianfan_secret_key = arg2 - settings.qianfan_language_model = arg3 - # TODO: test the connection - # test_url = "https://aip.baidubce.com/oauth/2.0/token" # POST - elif llm_option == "ollama": - settings.ollama_host = arg1 - settings.ollama_port = int(arg2) - settings.ollama_language_model = arg3 - gr.Info("configured!") - settings.update_env() - llm_config_button.click(apply_llm_configuration, inputs=llm_config_input) # pylint: disable=no-member - gr.Markdown("3. Set up the Embedding.") embedding_dropdown = gr.Dropdown( choices=["openai", "ollama", "qianfan_wenxin"], @@ -485,21 +484,6 @@ def apply_embedding_configuration(arg1, arg2, arg3): btn = gr.Button("(BETA) Init HugeGraph test data (🚧WIP)") btn.click(fn=init_hg_test_data, inputs=inp, outputs=out) # pylint: disable=no-member - # @app.get("/rag/{query}") - # def graph_rag_api(query: str): - # result = graph_rag(query, True, True, True, True) - # return {"raw_answer": result[0], "vector_only_answer": result[1], - # "graph_only_answer": result[2], "graph_vector_answer": result[3]} - - # @app.get("/rag/graph/{query}") - # def graph_rag_api(query: str): - # result = graph_rag(query, False, False, True, False) - # log.debug(result) - # # return {"graph_only_answer": result[2]} - # return {"raw_answer": result[0], "vector_only_answer": result[1], - # "graph_only_answer": result[2], "graph_vector_answer": result[3]} - - @app.post("/rag") def graph_rag_api(req: RAGRequest): result = graph_rag(req.query, req.raw_llm, req.vector_only, req.graph_only, req.graph_vector) From fa6df7b0effc19ebcee20948796672ee4876ee00 Mon Sep 17 00:00:00 2001 From: Hongjun Li Date: Thu, 15 Aug 2024 17:49:41 +0800 Subject: [PATCH 07/31] feat(rag_web_demo): Add the Set up the LLM http interface --- .../src/hugegraph_llm/demo/rag_web_demo.py | 50 ++++++++++++------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index df2a04af..bd5f1c04 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -161,7 +161,8 @@ class GraphConfigRequest(BaseModel): class LLMConfigRequest(BaseModel): llm_type: str - # The common parameters shared by OpenAI, Qianfan Wenxin, and OLLAMA platforms. + # The common parameters shared by OpenAI, Qianfan Wenxin, + # and OLLAMA platforms. api_key: str api_base: str language_model: str @@ -208,21 +209,17 @@ def test_api_connection(url, method="GET", headers=None, body=None, auth=None): response = requests.post(url, headers=headers, json=body, timeout=5, auth=auth) else: log.error("Unsupported method: %s", method) - # for http api return status - # return {"status": "Unsupported method: " + method} if 200 <= response.status_code < 300: log.info("Connection successful. Configured finished.") gr.Info("Connection successful. Configured finished.") - # for http api return status - # return {"status": "Connection successful. Configured finished."} else: log.error("Connection failed with status code: %s", response.status_code) # pylint: disable=pointless-exception-statement gr.Error(f"Connection failed with status code: {response.status_code}") - # for http api return status - # return {"status": "Connection failed with status code: " + str(response.status_code)} - return {"status": "hello!!!"} + # for http api return status + return response.status_code + def apply_graph_configuration(ip, port, name, user, pwd, gs): @@ -252,10 +249,11 @@ def apply_graph_configuration(ip, port, name, user, pwd, gs): label="LLM" ) - + # Different llm models have different parameters, + # so no meaningful argument names are given here def apply_llm_configuration(arg1, arg2, arg3, arg4): llm_option = settings.llm_type - + status_code = 200 if llm_option == "openai": settings.openai_api_key = arg1 settings.openai_api_base = arg2 @@ -263,7 +261,7 @@ def apply_llm_configuration(arg1, arg2, arg3, arg4): settings.openai_max_tokens = int(arg4) test_url = settings.openai_api_base + "/models" headers = {"Authorization": f"Bearer {arg1}"} - test_api_connection(test_url, headers=headers) + status_code = test_api_connection(test_url, headers=headers) elif llm_option == "qianfan_wenxin": settings.qianfan_api_key = arg1 settings.qianfan_secret_key = arg2 @@ -276,6 +274,7 @@ def apply_llm_configuration(arg1, arg2, arg3, arg4): settings.ollama_language_model = arg3 gr.Info("configured!") settings.update_env() + return status_code @gr.render(inputs=[llm_dropdown]) def llm_settings(llm_type): @@ -492,15 +491,28 @@ def graph_rag_api(req: RAGRequest): @app.post("/graph/config") def graph_config_api(req: GraphConfigRequest): - result = apply_graph_configuration(req.ip, req.port, req.name, req.user, req.pwd, req.gs) - return json.dumps(result) + # Accept status code + status_code = apply_graph_configuration(req.ip, req.port, req.name, req.user, req.pwd, req.gs) + if 200 <= status_code < 300: + return {"message":"Connection successful. Configured finished."} + else: + return {"message":f"Connection failed with status code: {status_code}"} - # @app.post("/llm/config") - # def graph_config_api(req: LLMConfigRequest): - # settings.llm_type = req.llm_type - # if req.llm_type == req.llm_type: - # result = llm_settings.apply_llm_configuration(req.api_key, req.api_base, req.language_model, req.max_tokens) - # return json.dumps(result) + @app.post("/llm/config") + def graph_config_api(req: LLMConfigRequest): + settings.llm_type = req.llm_type + + if req.llm_type == "openai": + status_code = apply_llm_configuration(req.api_key, req.api_base, req.language_model, req.max_tokens) + elif req.llm_type == "qianfan_wenxin": + status_code = apply_llm_configuration(req.api_key, req.secret_key, req.language_model, None) + else: + status_code = apply_llm_configuration(req.host, req.port, req.language_model, None) + + if 200 <= status_code < 300: + return {"message":"Connection successful. Configured finished."} + else: + return {"message":f"Connection failed with status code: {status_code}"} app = gr.mount_gradio_app(app, hugegraph_llm, path="/") From f2d1c439dacb3bb90d2ee2da3c2bd64bbeea6e96 Mon Sep 17 00:00:00 2001 From: Hongjun Li Date: Thu, 15 Aug 2024 18:07:30 +0800 Subject: [PATCH 08/31] fix(rag_web_demo): Fix possible errors caused by Unsupported method in test_api_connection --- .../src/hugegraph_llm/demo/rag_web_demo.py | 81 ++++++++++++------- 1 file changed, 52 insertions(+), 29 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index bd5f1c04..c292abe2 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -205,20 +205,30 @@ def test_api_connection(url, method="GET", headers=None, body=None, auth=None): log.debug("Request URL: %s", url) if method.upper() == "GET": response = requests.get(url, headers=headers, timeout=5, auth=auth) + elif method.upper() == "POST": response = requests.post(url, headers=headers, json=body, timeout=5, auth=auth) else: log.error("Unsupported method: %s", method) - if 200 <= response.status_code < 300: - log.info("Connection successful. Configured finished.") - gr.Info("Connection successful. Configured finished.") + if response is None: + # Unsupported method encountered + return -1 + + # HTTP API return status + status_code = response.status_code + + if 200 <= status_code < 300: + message = "Connection successful. Configured finished." + log.info(message) + gr.Info(message) else: - log.error("Connection failed with status code: %s", response.status_code) - # pylint: disable=pointless-exception-statement - gr.Error(f"Connection failed with status code: {response.status_code}") - # for http api return status - return response.status_code + message = f"Connection failed with status code: {status_code}" + log.error(message) + gr.Error(message) + + return status_code + @@ -322,6 +332,24 @@ def llm_settings(llm_type): + def apply_embedding_configuration(embedding_option, arg1, arg2, arg3): + if embedding_option == "openai": + settings.openai_api_key = arg1 + settings.openai_api_base = arg2 + settings.openai_embedding_model = arg3 + test_url = settings.openai_api_base + "/models" + headers = {"Authorization": f"Bearer {arg1}"} + test_api_connection(test_url, headers=headers) + elif embedding_option == "ollama": + settings.ollama_host = arg1 + settings.ollama_port = int(arg2) + settings.ollama_embedding_model = arg3 + elif embedding_option == "qianfan_wenxin": + settings.qianfan_access_token = arg1 + settings.qianfan_embed_url = arg2 + settings.update_env() + gr.Info("configured!") + @gr.render(inputs=[embedding_dropdown]) def embedding_settings(embedding_type): settings.embedding_type = embedding_type @@ -336,9 +364,9 @@ def embedding_settings(embedding_type): with gr.Row(): embedding_config_input = [ gr.Textbox(value=settings.qianfan_api_key, label="api_key", - type="password"), + type="password"), gr.Textbox(value=settings.qianfan_secret_key, label="secret_key", - type="password"), + type="password"), gr.Textbox(value=settings.qianfan_embedding_model, label="model_name"), ] elif embedding_type == "ollama": @@ -350,27 +378,15 @@ def embedding_settings(embedding_type): ] else: embedding_config_input = [] + embedding_config_button = gr.Button("apply configuration") + + # 在这里调用独立的 apply_embedding_configuration 函数 + embedding_config_button.click( + lambda arg1, arg2, arg3: apply_embedding_configuration(settings.embedding_type, arg1, arg2, arg3), + inputs=embedding_config_input + ) - def apply_embedding_configuration(arg1, arg2, arg3): - embedding_option = settings.embedding_type - if embedding_option == "openai": - settings.openai_api_key = arg1 - settings.openai_api_base = arg2 - settings.openai_embedding_model = arg3 - test_url = settings.openai_api_base + "/models" - headers = {"Authorization": f"Bearer {arg1}"} - test_api_connection(test_url, headers=headers) - elif embedding_option == "ollama": - settings.ollama_host = arg1 - settings.ollama_port = int(arg2) - settings.ollama_embedding_model = arg3 - elif embedding_option == "qianfan_wenxin": - settings.qianfan_access_token = arg1 - settings.qianfan_embed_url = arg2 - settings.update_env() - - gr.Info("configured!") embedding_config_button.click(apply_embedding_configuration, # pylint: disable=no-member inputs=embedding_config_input) @@ -493,6 +509,10 @@ def graph_rag_api(req: RAGRequest): def graph_config_api(req: GraphConfigRequest): # Accept status code status_code = apply_graph_configuration(req.ip, req.port, req.name, req.user, req.pwd, req.gs) + + if status_code == -1: + return {"message":"Unsupported HTTP method"} + if 200 <= status_code < 300: return {"message":"Connection successful. Configured finished."} else: @@ -509,6 +529,9 @@ def graph_config_api(req: LLMConfigRequest): else: status_code = apply_llm_configuration(req.host, req.port, req.language_model, None) + if status_code == -1: + return {"message":"Unsupported HTTP method"} + if 200 <= status_code < 300: return {"message":"Connection successful. Configured finished."} else: From 3235e0723333125ed87f92fad1dfb0c45989603a Mon Sep 17 00:00:00 2001 From: Hongjun Li Date: Thu, 15 Aug 2024 18:22:03 +0800 Subject: [PATCH 09/31] feat(rag_web_demo): The embedding_config_api interface completes the encapsulation --- .../src/hugegraph_llm/demo/rag_web_demo.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index c292abe2..34ed4cd4 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -519,7 +519,7 @@ def graph_config_api(req: GraphConfigRequest): return {"message":f"Connection failed with status code: {status_code}"} @app.post("/llm/config") - def graph_config_api(req: LLMConfigRequest): + def llm_config_api(req: LLMConfigRequest): settings.llm_type = req.llm_type if req.llm_type == "openai": @@ -536,6 +536,23 @@ def graph_config_api(req: LLMConfigRequest): return {"message":"Connection successful. Configured finished."} else: return {"message":f"Connection failed with status code: {status_code}"} + + @app.post("/embedding/config") + def embedding_config_api(req: LLMConfigRequest): + if req.llm_type == "openai": + status_code = apply_embedding_configuration(req.llm_type, req.api_key, req.api_base, req.language_model) + elif req.llm_type == "qianfan_wenxin": + status_code = apply_embedding_configuration(req.llm_type, req.api_key, req.api_base, None) + else: + status_code = apply_embedding_configuration(req.llm_type, req.host, req.port, req.language_model) + + if status_code == -1: + return {"message":"Unsupported HTTP method"} + + if 200 <= status_code < 300: + return {"message":"Connection successful. Configured finished."} + else: + return {"message":f"Connection failed with status code: {status_code}"} app = gr.mount_gradio_app(app, hugegraph_llm, path="/") From e1943b8ba05f37f7ea109328fb7b9426c1dd3588 Mon Sep 17 00:00:00 2001 From: Hongjun Li Date: Thu, 15 Aug 2024 19:11:03 +0800 Subject: [PATCH 10/31] fix(rag_web_demo): Fix the setting missing in apply_embedding_configuration and the hg-server configuration graphspace problem --- .../src/hugegraph_llm/demo/rag_web_demo.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index 34ed4cd4..31d7ee49 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -195,7 +195,9 @@ class LLMConfigRequest(BaseModel): gr.Textbox(value=settings.graph_name, label="graph"), gr.Textbox(value=settings.graph_user, label="user"), gr.Textbox(value=settings.graph_pwd, label="pwd", type="password"), - gr.Textbox(value=settings.graph_space, label="graphspace (None)"), + # gr.Textbox(value=settings.graph_space, label="graphspace (None)"), + # wip: graph_space issue pending + gr.Textbox(value="", label="graphspace (None)"), ] graph_config_button = gr.Button("apply configuration") @@ -329,10 +331,8 @@ def llm_settings(llm_type): label="Embedding" ) - - - - def apply_embedding_configuration(embedding_option, arg1, arg2, arg3): + def apply_embedding_configuration(arg1, arg2, arg3): + embedding_option = settings.embedding_type if embedding_option == "openai": settings.openai_api_key = arg1 settings.openai_api_base = arg2 @@ -539,12 +539,14 @@ def llm_config_api(req: LLMConfigRequest): @app.post("/embedding/config") def embedding_config_api(req: LLMConfigRequest): + settings.embedding_type = req.llm_type + if req.llm_type == "openai": - status_code = apply_embedding_configuration(req.llm_type, req.api_key, req.api_base, req.language_model) + status_code = apply_embedding_configuration(req.api_key, req.api_base, req.language_model) elif req.llm_type == "qianfan_wenxin": - status_code = apply_embedding_configuration(req.llm_type, req.api_key, req.api_base, None) + status_code = apply_embedding_configuration(req.api_key, req.api_base, None) else: - status_code = apply_embedding_configuration(req.llm_type, req.host, req.port, req.language_model) + status_code = apply_embedding_configuration(req.host, req.port, req.language_model) if status_code == -1: return {"message":"Unsupported HTTP method"} From d6e6e2025a3b403afa4624d4a7013d058e58e921 Mon Sep 17 00:00:00 2001 From: Hongjun Li Date: Thu, 15 Aug 2024 19:37:47 +0800 Subject: [PATCH 11/31] refactor(rag_web_demo): Split the code inside the main function based on functionality --- .../src/hugegraph_llm/demo/rag_web_demo.py | 212 +++++++++--------- 1 file changed, 107 insertions(+), 105 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index 31d7ee49..6541a390 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -174,15 +174,100 @@ class LLMConfigRequest(BaseModel): host: str = None port: str = None +def test_api_connection(url, method="GET", headers=None, body=None, auth=None): + # TODO: use fastapi.request / starlette instead? (Also add a try-catch here) + log.debug("Request URL: %s", url) + if method.upper() == "GET": + response = requests.get(url, headers=headers, timeout=5, auth=auth) + + elif method.upper() == "POST": + response = requests.post(url, headers=headers, json=body, timeout=5, auth=auth) + else: + log.error("Unsupported method: %s", method) + if response is None: + # Unsupported method encountered + return -1 -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--host", type=str, default="0.0.0.0", help="host") - parser.add_argument("--port", type=int, default=8001, help="port") - args = parser.parse_args() - app = FastAPI() + # HTTP API return status + status_code = response.status_code + if 200 <= status_code < 300: + message = "Connection successful. Configured finished." + log.info(message) + gr.Info(message) + else: + message = f"Connection failed with status code: {status_code}" + log.error(message) + gr.Error(message) + + return status_code + +def apply_embedding_configuration(arg1, arg2, arg3): + embedding_option = settings.embedding_type + if embedding_option == "openai": + settings.openai_api_key = arg1 + settings.openai_api_base = arg2 + settings.openai_embedding_model = arg3 + test_url = settings.openai_api_base + "/models" + headers = {"Authorization": f"Bearer {arg1}"} + test_api_connection(test_url, headers=headers) + elif embedding_option == "ollama": + settings.ollama_host = arg1 + settings.ollama_port = int(arg2) + settings.ollama_embedding_model = arg3 + elif embedding_option == "qianfan_wenxin": + settings.qianfan_access_token = arg1 + settings.qianfan_embed_url = arg2 + settings.update_env() + gr.Info("configured!") + +def apply_graph_configuration(ip, port, name, user, pwd, gs): + settings.graph_ip = ip + settings.graph_port = int(port) + settings.graph_name = name + settings.graph_user = user + settings.graph_pwd = pwd + settings.graph_space = gs + # Test graph connection (Auth) + if gs and gs.strip(): + test_url = f"http://{ip}:{port}/graphspaces/{gs}/graphs/{name}/schema" + else: + test_url = f"http://{ip}:{port}/graphs/{name}/schema" + auth = HTTPBasicAuth(user, pwd) + # for http api return status + result = test_api_connection(test_url, auth=auth) + settings.update_env() + return result + +# Different llm models have different parameters, +# so no meaningful argument names are given here +def apply_llm_configuration(arg1, arg2, arg3, arg4): + llm_option = settings.llm_type + status_code = 200 + if llm_option == "openai": + settings.openai_api_key = arg1 + settings.openai_api_base = arg2 + settings.openai_language_model = arg3 + settings.openai_max_tokens = int(arg4) + test_url = settings.openai_api_base + "/models" + headers = {"Authorization": f"Bearer {arg1}"} + status_code = test_api_connection(test_url, headers=headers) + elif llm_option == "qianfan_wenxin": + settings.qianfan_api_key = arg1 + settings.qianfan_secret_key = arg2 + settings.qianfan_language_model = arg3 + # TODO: test the connection + # test_url = "https://aip.baidubce.com/oauth/2.0/token" # POST + elif llm_option == "ollama": + settings.ollama_host = arg1 + settings.ollama_port = int(arg2) + settings.ollama_language_model = arg3 + gr.Info("configured!") + settings.update_env() + return status_code + +def create_hugegraph_llm_interface(): with gr.Blocks() as hugegraph_llm: gr.Markdown( """# HugeGraph LLM RAG Demo @@ -200,58 +285,7 @@ class LLMConfigRequest(BaseModel): gr.Textbox(value="", label="graphspace (None)"), ] graph_config_button = gr.Button("apply configuration") - - - def test_api_connection(url, method="GET", headers=None, body=None, auth=None): - # TODO: use fastapi.request / starlette instead? (Also add a try-catch here) - log.debug("Request URL: %s", url) - if method.upper() == "GET": - response = requests.get(url, headers=headers, timeout=5, auth=auth) - - elif method.upper() == "POST": - response = requests.post(url, headers=headers, json=body, timeout=5, auth=auth) - else: - log.error("Unsupported method: %s", method) - - if response is None: - # Unsupported method encountered - return -1 - - # HTTP API return status - status_code = response.status_code - - if 200 <= status_code < 300: - message = "Connection successful. Configured finished." - log.info(message) - gr.Info(message) - else: - message = f"Connection failed with status code: {status_code}" - log.error(message) - gr.Error(message) - - return status_code - - - - - def apply_graph_configuration(ip, port, name, user, pwd, gs): - settings.graph_ip = ip - settings.graph_port = int(port) - settings.graph_name = name - settings.graph_user = user - settings.graph_pwd = pwd - settings.graph_space = gs - # Test graph connection (Auth) - if gs and gs.strip(): - test_url = f"http://{ip}:{port}/graphspaces/{gs}/graphs/{name}/schema" - else: - test_url = f"http://{ip}:{port}/graphs/{name}/schema" - auth = HTTPBasicAuth(user, pwd) - # for http api return status - result = test_api_connection(test_url, auth=auth) - settings.update_env() - return result - + graph_config_button.click(apply_graph_configuration, inputs=graph_config_input) # pylint: disable=no-member gr.Markdown("2. Set up the LLM.") @@ -261,33 +295,6 @@ def apply_graph_configuration(ip, port, name, user, pwd, gs): label="LLM" ) - # Different llm models have different parameters, - # so no meaningful argument names are given here - def apply_llm_configuration(arg1, arg2, arg3, arg4): - llm_option = settings.llm_type - status_code = 200 - if llm_option == "openai": - settings.openai_api_key = arg1 - settings.openai_api_base = arg2 - settings.openai_language_model = arg3 - settings.openai_max_tokens = int(arg4) - test_url = settings.openai_api_base + "/models" - headers = {"Authorization": f"Bearer {arg1}"} - status_code = test_api_connection(test_url, headers=headers) - elif llm_option == "qianfan_wenxin": - settings.qianfan_api_key = arg1 - settings.qianfan_secret_key = arg2 - settings.qianfan_language_model = arg3 - # TODO: test the connection - # test_url = "https://aip.baidubce.com/oauth/2.0/token" # POST - elif llm_option == "ollama": - settings.ollama_host = arg1 - settings.ollama_port = int(arg2) - settings.ollama_language_model = arg3 - gr.Info("configured!") - settings.update_env() - return status_code - @gr.render(inputs=[llm_dropdown]) def llm_settings(llm_type): settings.llm_type = llm_type @@ -331,25 +338,6 @@ def llm_settings(llm_type): label="Embedding" ) - def apply_embedding_configuration(arg1, arg2, arg3): - embedding_option = settings.embedding_type - if embedding_option == "openai": - settings.openai_api_key = arg1 - settings.openai_api_base = arg2 - settings.openai_embedding_model = arg3 - test_url = settings.openai_api_base + "/models" - headers = {"Authorization": f"Bearer {arg1}"} - test_api_connection(test_url, headers=headers) - elif embedding_option == "ollama": - settings.ollama_host = arg1 - settings.ollama_port = int(arg2) - settings.ollama_embedding_model = arg3 - elif embedding_option == "qianfan_wenxin": - settings.qianfan_access_token = arg1 - settings.qianfan_embed_url = arg2 - settings.update_env() - gr.Info("configured!") - @gr.render(inputs=[embedding_dropdown]) def embedding_settings(embedding_type): settings.embedding_type = embedding_type @@ -383,7 +371,7 @@ def embedding_settings(embedding_type): # 在这里调用独立的 apply_embedding_configuration 函数 embedding_config_button.click( - lambda arg1, arg2, arg3: apply_embedding_configuration(settings.embedding_type, arg1, arg2, arg3), + lambda arg1, arg2, arg3: apply_embedding_configuration(arg1, arg2, arg3), inputs=embedding_config_input ) @@ -498,7 +486,10 @@ def embedding_settings(embedding_type): out = gr.Textbox(label="Output", show_copy_button=True) btn = gr.Button("(BETA) Init HugeGraph test data (🚧WIP)") btn.click(fn=init_hg_test_data, inputs=inp, outputs=out) # pylint: disable=no-member + return hugegraph_llm + +def rag_web_http_api(): @app.post("/rag") def graph_rag_api(req: RAGRequest): result = graph_rag(req.query, req.raw_llm, req.vector_only, req.graph_only, req.graph_vector) @@ -557,6 +548,17 @@ def embedding_config_api(req: LLMConfigRequest): return {"message":f"Connection failed with status code: {status_code}"} +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--host", type=str, default="0.0.0.0", help="host") + parser.add_argument("--port", type=int, default=8001, help="port") + args = parser.parse_args() + app = FastAPI() + + hugegraph_llm = create_hugegraph_llm_interface() + + rag_web_http_api() + app = gr.mount_gradio_app(app, hugegraph_llm, path="/") # Note: set reload to False in production environment uvicorn.run(app, host=args.host, port=args.port) From 4ae6b857fd1342d36a995119b3c6f39690dcb883 Mon Sep 17 00:00:00 2001 From: Hongjun Li Date: Thu, 15 Aug 2024 19:47:21 +0800 Subject: [PATCH 12/31] fix(rag_web_demo): Change the RAGRequest parameter defaults --- hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index 6541a390..c01e75f6 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -146,10 +146,10 @@ def build_kg(file, schema, example_prompt, build_mode): # pylint: disable=too-m class RAGRequest(BaseModel): query: str - raw_llm: Optional[bool] = None - vector_only: Optional[bool] = None - graph_only: Optional[bool] = None - graph_vector: Optional[bool] = None + raw_llm: Optional[bool] = True + vector_only: Optional[bool] = False + graph_only: Optional[bool] = False + graph_vector: Optional[bool] = False class GraphConfigRequest(BaseModel): ip: str = "127.0.0.1" From 5be4cf22c588d87905c745a1acd5ea94704e0af8 Mon Sep 17 00:00:00 2001 From: Hongjun Li Date: Thu, 15 Aug 2024 19:52:45 +0800 Subject: [PATCH 13/31] fix(config): graph_space option change to None --- hugegraph-llm/src/hugegraph_llm/config/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hugegraph-llm/src/hugegraph_llm/config/config.py b/hugegraph-llm/src/hugegraph_llm/config/config.py index c1476c9d..3659cc1d 100644 --- a/hugegraph-llm/src/hugegraph_llm/config/config.py +++ b/hugegraph-llm/src/hugegraph_llm/config/config.py @@ -67,7 +67,7 @@ class Config: """HugeGraph settings""" graph_ip: Optional[str] = "127.0.0.1" graph_port: Optional[int] = 8080 - graph_space: Optional[str] = "DEFAULT" + graph_space: Optional[str] = None graph_name: Optional[str] = "hugegraph" graph_user: Optional[str] = "admin" graph_pwd: Optional[str] = "xxx" From e33762ac529b73cdaa37dea296a347a495de4aab Mon Sep 17 00:00:00 2001 From: Hongjun Li Date: Thu, 15 Aug 2024 20:00:08 +0800 Subject: [PATCH 14/31] fix(rag_web_demo): Changing code comments --- hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index c01e75f6..cd7b8892 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -369,7 +369,7 @@ def embedding_settings(embedding_type): embedding_config_button = gr.Button("apply configuration") - # 在这里调用独立的 apply_embedding_configuration 函数 + # Call the separate apply_embedding_configuration function here embedding_config_button.click( lambda arg1, arg2, arg3: apply_embedding_configuration(arg1, arg2, arg3), inputs=embedding_config_input From e8a22d3241728317fb6d6536c156d1518f775f70 Mon Sep 17 00:00:00 2001 From: Hongjun Li Date: Fri, 16 Aug 2024 17:05:38 +0800 Subject: [PATCH 15/31] style(rag_web_demo): Add a space after the colon --- .../src/hugegraph_llm/demo/rag_web_demo.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index cd7b8892..4e4cc9d3 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -502,12 +502,12 @@ def graph_config_api(req: GraphConfigRequest): status_code = apply_graph_configuration(req.ip, req.port, req.name, req.user, req.pwd, req.gs) if status_code == -1: - return {"message":"Unsupported HTTP method"} + return {"message": "Unsupported HTTP method"} if 200 <= status_code < 300: - return {"message":"Connection successful. Configured finished."} + return {"message": "Connection successful. Configured finished."} else: - return {"message":f"Connection failed with status code: {status_code}"} + return {"message": f"Connection failed with status code: {status_code}"} @app.post("/llm/config") def llm_config_api(req: LLMConfigRequest): @@ -521,12 +521,12 @@ def llm_config_api(req: LLMConfigRequest): status_code = apply_llm_configuration(req.host, req.port, req.language_model, None) if status_code == -1: - return {"message":"Unsupported HTTP method"} + return {"message": "Unsupported HTTP method"} if 200 <= status_code < 300: - return {"message":"Connection successful. Configured finished."} + return {"message": "Connection successful. Configured finished."} else: - return {"message":f"Connection failed with status code: {status_code}"} + return {"message": f"Connection failed with status code: {status_code}"} @app.post("/embedding/config") def embedding_config_api(req: LLMConfigRequest): @@ -540,10 +540,10 @@ def embedding_config_api(req: LLMConfigRequest): status_code = apply_embedding_configuration(req.host, req.port, req.language_model) if status_code == -1: - return {"message":"Unsupported HTTP method"} + return {"message": "Unsupported HTTP method"} if 200 <= status_code < 300: - return {"message":"Connection successful. Configured finished."} + return {"message": "Connection successful. Configured finished."} else: return {"message":f"Connection failed with status code: {status_code}"} From 7631a5f6b9f1204b799bf73f72bccdc405dbcbcd Mon Sep 17 00:00:00 2001 From: Hongjun Li Date: Fri, 16 Aug 2024 17:11:52 +0800 Subject: [PATCH 16/31] fix(rag_web_demo): Fixed missing return value of apply_embedding_configuration function --- .../src/hugegraph_llm/demo/rag_web_demo.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index 4e4cc9d3..6aa7f6d3 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -204,6 +204,9 @@ def test_api_connection(url, method="GET", headers=None, body=None, auth=None): return status_code def apply_embedding_configuration(arg1, arg2, arg3): + # Because of ollama, the qianfan_wenxin model is missing the test connect procedure, + # so it defaults to 200 so that there is no return value problem + status_code = 200 embedding_option = settings.embedding_type if embedding_option == "openai": settings.openai_api_key = arg1 @@ -211,7 +214,7 @@ def apply_embedding_configuration(arg1, arg2, arg3): settings.openai_embedding_model = arg3 test_url = settings.openai_api_base + "/models" headers = {"Authorization": f"Bearer {arg1}"} - test_api_connection(test_url, headers=headers) + status_code = test_api_connection(test_url, headers=headers) elif embedding_option == "ollama": settings.ollama_host = arg1 settings.ollama_port = int(arg2) @@ -221,6 +224,7 @@ def apply_embedding_configuration(arg1, arg2, arg3): settings.qianfan_embed_url = arg2 settings.update_env() gr.Info("configured!") + return status_code def apply_graph_configuration(ip, port, name, user, pwd, gs): settings.graph_ip = ip @@ -236,14 +240,16 @@ def apply_graph_configuration(ip, port, name, user, pwd, gs): test_url = f"http://{ip}:{port}/graphs/{name}/schema" auth = HTTPBasicAuth(user, pwd) # for http api return status - result = test_api_connection(test_url, auth=auth) + status_code = test_api_connection(test_url, auth=auth) settings.update_env() - return result + return status_code # Different llm models have different parameters, # so no meaningful argument names are given here def apply_llm_configuration(arg1, arg2, arg3, arg4): llm_option = settings.llm_type + # Because of ollama, the qianfan_wenxin model is missing the test connect procedure, + # so it defaults to 200 so that there is no return value problem status_code = 200 if llm_option == "openai": settings.openai_api_key = arg1 @@ -545,7 +551,7 @@ def embedding_config_api(req: LLMConfigRequest): if 200 <= status_code < 300: return {"message": "Connection successful. Configured finished."} else: - return {"message":f"Connection failed with status code: {status_code}"} + return {"message": f"Connection failed with status code: {status_code}"} if __name__ == "__main__": From ddc7931a27d17095ba8e5e28827ac6e699bb36ed Mon Sep 17 00:00:00 2001 From: imbajin Date: Fri, 16 Aug 2024 20:00:49 +0800 Subject: [PATCH 17/31] tiny fix --- .gitignore | 1 + .../src/hugegraph_llm/demo/rag_web_demo.py | 49 ++++++++++--------- 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/.gitignore b/.gitignore index de241917..786b5e10 100644 --- a/.gitignore +++ b/.gitignore @@ -164,3 +164,4 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. .idea/ +*.DS_Store diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index 6aa7f6d3..1ed7530b 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -59,7 +59,7 @@ def graph_rag(text: str, raw_answer: bool, vector_only_answer: bool, vector_search = vector_only_answer or graph_vector_answer graph_search = graph_only_answer or graph_vector_answer - if raw_answer == False and not vector_search and not graph_search: + if raw_answer is False and not vector_search and not graph_search: gr.Warning("Please select at least one generate mode.") return "", "", "", "" searcher = GraphRAG() @@ -151,13 +151,15 @@ class RAGRequest(BaseModel): graph_only: Optional[bool] = False graph_vector: Optional[bool] = False + class GraphConfigRequest(BaseModel): ip: str = "127.0.0.1" port: str = "8080" name: str = "hugegraph" user: str = "xxx" pwd: str = "xxx" - gs: str = "" + gs: str = None + class LLMConfigRequest(BaseModel): llm_type: str @@ -174,12 +176,13 @@ class LLMConfigRequest(BaseModel): host: str = None port: str = None + def test_api_connection(url, method="GET", headers=None, body=None, auth=None): # TODO: use fastapi.request / starlette instead? (Also add a try-catch here) + response = None log.debug("Request URL: %s", url) if method.upper() == "GET": response = requests.get(url, headers=headers, timeout=5, auth=auth) - elif method.upper() == "POST": response = requests.post(url, headers=headers, json=body, timeout=5, auth=auth) else: @@ -191,7 +194,6 @@ def test_api_connection(url, method="GET", headers=None, body=None, auth=None): # HTTP API return status status_code = response.status_code - if 200 <= status_code < 300: message = "Connection successful. Configured finished." log.info(message) @@ -200,9 +202,9 @@ def test_api_connection(url, method="GET", headers=None, body=None, auth=None): message = f"Connection failed with status code: {status_code}" log.error(message) gr.Error(message) - return status_code + def apply_embedding_configuration(arg1, arg2, arg3): # Because of ollama, the qianfan_wenxin model is missing the test connect procedure, # so it defaults to 200 so that there is no return value problem @@ -223,9 +225,10 @@ def apply_embedding_configuration(arg1, arg2, arg3): settings.qianfan_access_token = arg1 settings.qianfan_embed_url = arg2 settings.update_env() - gr.Info("configured!") + gr.Info("Configured!") return status_code + def apply_graph_configuration(ip, port, name, user, pwd, gs): settings.graph_ip = ip settings.graph_port = int(port) @@ -244,6 +247,7 @@ def apply_graph_configuration(ip, port, name, user, pwd, gs): settings.update_env() return status_code + # Different llm models have different parameters, # so no meaningful argument names are given here def apply_llm_configuration(arg1, arg2, arg3, arg4): @@ -269,10 +273,11 @@ def apply_llm_configuration(arg1, arg2, arg3, arg4): settings.ollama_host = arg1 settings.ollama_port = int(arg2) settings.ollama_language_model = arg3 - gr.Info("configured!") + gr.Info("Configured!") settings.update_env() return status_code + def create_hugegraph_llm_interface(): with gr.Blocks() as hugegraph_llm: gr.Markdown( @@ -291,7 +296,7 @@ def create_hugegraph_llm_interface(): gr.Textbox(value="", label="graphspace (None)"), ] graph_config_button = gr.Button("apply configuration") - + graph_config_button.click(apply_graph_configuration, inputs=graph_config_input) # pylint: disable=no-member gr.Markdown("2. Set up the LLM.") @@ -324,9 +329,9 @@ def llm_settings(llm_type): with gr.Row(): llm_config_input = [ gr.Textbox(value=settings.qianfan_api_key, label="api_key", - type="password"), + type="password"), gr.Textbox(value=settings.qianfan_secret_key, label="secret_key", - type="password"), + type="password"), gr.Textbox(value=settings.qianfan_language_model, label="model_name"), gr.Textbox(value="", visible=False) ] @@ -358,9 +363,9 @@ def embedding_settings(embedding_type): with gr.Row(): embedding_config_input = [ gr.Textbox(value=settings.qianfan_api_key, label="api_key", - type="password"), + type="password"), gr.Textbox(value=settings.qianfan_secret_key, label="secret_key", - type="password"), + type="password"), gr.Textbox(value=settings.qianfan_embedding_model, label="model_name"), ] elif embedding_type == "ollama": @@ -374,18 +379,16 @@ def embedding_settings(embedding_type): embedding_config_input = [] embedding_config_button = gr.Button("apply configuration") - + # Call the separate apply_embedding_configuration function here embedding_config_button.click( lambda arg1, arg2, arg3: apply_embedding_configuration(arg1, arg2, arg3), inputs=embedding_config_input ) - embedding_config_button.click(apply_embedding_configuration, # pylint: disable=no-member inputs=embedding_config_input) - gr.Markdown( """## 1. Build vector/graph RAG (💡) - Document: Input document file which should be TXT or DOCX. @@ -506,26 +509,26 @@ def graph_rag_api(req: RAGRequest): def graph_config_api(req: GraphConfigRequest): # Accept status code status_code = apply_graph_configuration(req.ip, req.port, req.name, req.user, req.pwd, req.gs) - + if status_code == -1: return {"message": "Unsupported HTTP method"} - + if 200 <= status_code < 300: return {"message": "Connection successful. Configured finished."} else: return {"message": f"Connection failed with status code: {status_code}"} - + @app.post("/llm/config") def llm_config_api(req: LLMConfigRequest): settings.llm_type = req.llm_type - + if req.llm_type == "openai": status_code = apply_llm_configuration(req.api_key, req.api_base, req.language_model, req.max_tokens) elif req.llm_type == "qianfan_wenxin": status_code = apply_llm_configuration(req.api_key, req.secret_key, req.language_model, None) else: status_code = apply_llm_configuration(req.host, req.port, req.language_model, None) - + if status_code == -1: return {"message": "Unsupported HTTP method"} @@ -533,7 +536,7 @@ def llm_config_api(req: LLMConfigRequest): return {"message": "Connection successful. Configured finished."} else: return {"message": f"Connection failed with status code: {status_code}"} - + @app.post("/embedding/config") def embedding_config_api(req: LLMConfigRequest): settings.embedding_type = req.llm_type @@ -544,7 +547,7 @@ def embedding_config_api(req: LLMConfigRequest): status_code = apply_embedding_configuration(req.api_key, req.api_base, None) else: status_code = apply_embedding_configuration(req.host, req.port, req.language_model) - + if status_code == -1: return {"message": "Unsupported HTTP method"} @@ -562,7 +565,7 @@ def embedding_config_api(req: LLMConfigRequest): app = FastAPI() hugegraph_llm = create_hugegraph_llm_interface() - + rag_web_http_api() app = gr.mount_gradio_app(app, hugegraph_llm, path="/") From 967e950b63c08c6ad47249f97479a4f90f763278 Mon Sep 17 00:00:00 2001 From: Hongjun Li Date: Fri, 16 Aug 2024 23:39:43 +0800 Subject: [PATCH 18/31] fix(rag_web_demo): Fixed test_api_connection gr.Error unable to render issues and encapsulate the http api in a single package --- .../src/hugegraph_llm/api/__init__ .py | 16 +++ .../src/hugegraph_llm/api/rag_api.py | 114 ++++++++++++++++ .../src/hugegraph_llm/demo/rag_web_demo.py | 126 +++--------------- 3 files changed, 147 insertions(+), 109 deletions(-) create mode 100644 hugegraph-llm/src/hugegraph_llm/api/__init__ .py create mode 100644 hugegraph-llm/src/hugegraph_llm/api/rag_api.py diff --git a/hugegraph-llm/src/hugegraph_llm/api/__init__ .py b/hugegraph-llm/src/hugegraph_llm/api/__init__ .py new file mode 100644 index 00000000..13a83393 --- /dev/null +++ b/hugegraph-llm/src/hugegraph_llm/api/__init__ .py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py new file mode 100644 index 00000000..cd81824b --- /dev/null +++ b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py @@ -0,0 +1,114 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from fastapi import FastAPI +from pydantic import BaseModel +from typing import Optional +from hugegraph_llm.config import settings + + +class RAGRequest(BaseModel): + query: str + raw_llm: Optional[bool] = True + vector_only: Optional[bool] = False + graph_only: Optional[bool] = False + graph_vector: Optional[bool] = False + + +class GraphConfigRequest(BaseModel): + ip: str = "127.0.0.1" + port: str = "8080" + name: str = "hugegraph" + user: str = "xxx" + pwd: str = "xxx" + gs: str = None + + +class LLMConfigRequest(BaseModel): + llm_type: str + # The common parameters shared by OpenAI, Qianfan Wenxin, + # and OLLAMA platforms. + api_key: str + api_base: str + language_model: str + # Openai-only properties + max_tokens: str = None + # qianfan-wenxin-only properties + secret_key: str = None + # ollama-only properties + host: str = None + port: str = None + + +def rag_web_http_api(app: FastAPI, graph_rag_func, apply_graph_configuration_func, + apply_llm_configuration_func, apply_embedding_configuration_func): + @app.post("/rag") + def graph_rag_api(req: RAGRequest): + result = graph_rag_func(req.query, req.raw_llm, req.vector_only, req.graph_only, req.graph_vector) + return {key: value for key, value in zip( + ["raw_llm", "vector_only", "graph_only", "graph_vector"], result) if getattr(req, key)} + + @app.post("/graph/config") + def graph_config_api(req: GraphConfigRequest): + # Accept status code + status_code = apply_graph_configuration_func(req.ip, req.port, req.name, req.user, req.pwd, req.gs, origin_call="http") + + if status_code == -1: + return {"message": "Unsupported HTTP method"} + + if 200 <= status_code < 300: + return {"message": "Connection successful. Configured finished."} + else: + return {"message": f"Connection failed with status code: {status_code}"} + + @app.post("/llm/config") + def llm_config_api(req: LLMConfigRequest): + settings.llm_type = req.llm_type + + if req.llm_type == "openai": + status_code = apply_llm_configuration_func(req.api_key, req.api_base, req.language_model, req.max_tokens, origin_call="http") + elif req.llm_type == "qianfan_wenxin": + status_code = apply_llm_configuration_func(req.api_key, req.secret_key, req.language_model, None, origin_call="http") + else: + status_code = apply_llm_configuration_func(req.host, req.port, req.language_model, None, origin_call="http") + + if status_code == -1: + return {"message": "Unsupported HTTP method"} + + if 200 <= status_code < 300: + return {"message": "Connection successful. Configured finished."} + else: + return {"message": f"Connection failed with status code: {status_code}"} + + @app.post("/embedding/config") + def embedding_config_api(req: LLMConfigRequest): + settings.embedding_type = req.llm_type + + if req.llm_type == "openai": + status_code = apply_embedding_configuration_func(req.api_key, req.api_base, req.language_model, origin_call="http") + elif req.llm_type == "qianfan_wenxin": + status_code = apply_embedding_configuration_func(req.api_key, req.api_base, None, origin_call="http") + else: + status_code = apply_embedding_configuration_func(req.host, req.port, req.language_model, origin_call="http") + + if status_code == -1: + return {"message": "Unsupported HTTP method"} + + if 200 <= status_code < 300: + return {"message": "Connection successful. Configured finished."} + else: + return {"message": f"Connection failed with status code: {status_code}"} \ No newline at end of file diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index 1ed7530b..b93a3169 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -19,14 +19,12 @@ import json import argparse import os -from typing import Optional import requests import uvicorn import docx import gradio as gr from fastapi import FastAPI -from pydantic import BaseModel from requests.auth import HTTPBasicAuth from hugegraph_llm.models.llms.init_llm import LLMs @@ -43,6 +41,7 @@ from hugegraph_llm.utils.log import log from hugegraph_llm.utils.hugegraph_utils import get_hg_client from hugegraph_llm.utils.vector_index_utils import clean_vector_index +from hugegraph_llm.api.rag_api import rag_web_http_api def convert_bool_str(string): @@ -143,41 +142,8 @@ def build_kg(file, schema, example_prompt, build_mode): # pylint: disable=too-m log.error(e) raise gr.Error(str(e)) - -class RAGRequest(BaseModel): - query: str - raw_llm: Optional[bool] = True - vector_only: Optional[bool] = False - graph_only: Optional[bool] = False - graph_vector: Optional[bool] = False - - -class GraphConfigRequest(BaseModel): - ip: str = "127.0.0.1" - port: str = "8080" - name: str = "hugegraph" - user: str = "xxx" - pwd: str = "xxx" - gs: str = None - - -class LLMConfigRequest(BaseModel): - llm_type: str - # The common parameters shared by OpenAI, Qianfan Wenxin, - # and OLLAMA platforms. - api_key: str - api_base: str - language_model: str - # Openai-only properties - max_tokens: str = None - # qianfan-wenxin-only properties - secret_key: str = None - # ollama-only properties - host: str = None - port: str = None - - -def test_api_connection(url, method="GET", headers=None, body=None, auth=None): +# todo: origin_call was created to stave off problems with gr.error that needed to be fixed +def test_api_connection(url, method="GET", headers=None, body=None, auth=None, origin_call=None): # TODO: use fastapi.request / starlette instead? (Also add a try-catch here) response = None log.debug("Request URL: %s", url) @@ -192,20 +158,21 @@ def test_api_connection(url, method="GET", headers=None, body=None, auth=None): # Unsupported method encountered return -1 - # HTTP API return status - status_code = response.status_code - if 200 <= status_code < 300: + if 200 <= response.status_code < 300: message = "Connection successful. Configured finished." log.info(message) gr.Info(message) else: - message = f"Connection failed with status code: {status_code}" + message = f"Connection failed with status code: {response.status_code}" log.error(message) - gr.Error(message) - return status_code + # todo: How to remove raise and gr will render error + # pylint: disable=pointless-exception-statement + if origin_call == None: + raise gr.Error(f"Connection failed with status code: {response.status_code}") + return response.status_code -def apply_embedding_configuration(arg1, arg2, arg3): +def apply_embedding_configuration(arg1, arg2, arg3, origin_call=None): # Because of ollama, the qianfan_wenxin model is missing the test connect procedure, # so it defaults to 200 so that there is no return value problem status_code = 200 @@ -216,7 +183,7 @@ def apply_embedding_configuration(arg1, arg2, arg3): settings.openai_embedding_model = arg3 test_url = settings.openai_api_base + "/models" headers = {"Authorization": f"Bearer {arg1}"} - status_code = test_api_connection(test_url, headers=headers) + status_code = test_api_connection(test_url, headers=headers, origin_call=origin_call) elif embedding_option == "ollama": settings.ollama_host = arg1 settings.ollama_port = int(arg2) @@ -229,7 +196,7 @@ def apply_embedding_configuration(arg1, arg2, arg3): return status_code -def apply_graph_configuration(ip, port, name, user, pwd, gs): +def apply_graph_configuration(ip, port, name, user, pwd, gs, origin_call=None): settings.graph_ip = ip settings.graph_port = int(port) settings.graph_name = name @@ -243,14 +210,14 @@ def apply_graph_configuration(ip, port, name, user, pwd, gs): test_url = f"http://{ip}:{port}/graphs/{name}/schema" auth = HTTPBasicAuth(user, pwd) # for http api return status - status_code = test_api_connection(test_url, auth=auth) + status_code = test_api_connection(test_url, auth=auth, origin_call=origin_call) settings.update_env() return status_code # Different llm models have different parameters, # so no meaningful argument names are given here -def apply_llm_configuration(arg1, arg2, arg3, arg4): +def apply_llm_configuration(arg1, arg2, arg3, arg4, origin_call=None): llm_option = settings.llm_type # Because of ollama, the qianfan_wenxin model is missing the test connect procedure, # so it defaults to 200 so that there is no return value problem @@ -262,7 +229,7 @@ def apply_llm_configuration(arg1, arg2, arg3, arg4): settings.openai_max_tokens = int(arg4) test_url = settings.openai_api_base + "/models" headers = {"Authorization": f"Bearer {arg1}"} - status_code = test_api_connection(test_url, headers=headers) + status_code = test_api_connection(test_url, headers=headers, origin_call=origin_call) elif llm_option == "qianfan_wenxin": settings.qianfan_api_key = arg1 settings.qianfan_secret_key = arg2 @@ -498,65 +465,6 @@ def embedding_settings(embedding_type): return hugegraph_llm -def rag_web_http_api(): - @app.post("/rag") - def graph_rag_api(req: RAGRequest): - result = graph_rag(req.query, req.raw_llm, req.vector_only, req.graph_only, req.graph_vector) - return {key: value for key, value in zip( - ["raw_llm", "vector_only", "graph_only", "graph_vector"], result) if getattr(req, key)} - - @app.post("/graph/config") - def graph_config_api(req: GraphConfigRequest): - # Accept status code - status_code = apply_graph_configuration(req.ip, req.port, req.name, req.user, req.pwd, req.gs) - - if status_code == -1: - return {"message": "Unsupported HTTP method"} - - if 200 <= status_code < 300: - return {"message": "Connection successful. Configured finished."} - else: - return {"message": f"Connection failed with status code: {status_code}"} - - @app.post("/llm/config") - def llm_config_api(req: LLMConfigRequest): - settings.llm_type = req.llm_type - - if req.llm_type == "openai": - status_code = apply_llm_configuration(req.api_key, req.api_base, req.language_model, req.max_tokens) - elif req.llm_type == "qianfan_wenxin": - status_code = apply_llm_configuration(req.api_key, req.secret_key, req.language_model, None) - else: - status_code = apply_llm_configuration(req.host, req.port, req.language_model, None) - - if status_code == -1: - return {"message": "Unsupported HTTP method"} - - if 200 <= status_code < 300: - return {"message": "Connection successful. Configured finished."} - else: - return {"message": f"Connection failed with status code: {status_code}"} - - @app.post("/embedding/config") - def embedding_config_api(req: LLMConfigRequest): - settings.embedding_type = req.llm_type - - if req.llm_type == "openai": - status_code = apply_embedding_configuration(req.api_key, req.api_base, req.language_model) - elif req.llm_type == "qianfan_wenxin": - status_code = apply_embedding_configuration(req.api_key, req.api_base, None) - else: - status_code = apply_embedding_configuration(req.host, req.port, req.language_model) - - if status_code == -1: - return {"message": "Unsupported HTTP method"} - - if 200 <= status_code < 300: - return {"message": "Connection successful. Configured finished."} - else: - return {"message": f"Connection failed with status code: {status_code}"} - - if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--host", type=str, default="0.0.0.0", help="host") @@ -566,7 +474,7 @@ def embedding_config_api(req: LLMConfigRequest): hugegraph_llm = create_hugegraph_llm_interface() - rag_web_http_api() + rag_web_http_api(app, graph_rag, apply_graph_configuration, apply_llm_configuration, apply_embedding_configuration) app = gr.mount_gradio_app(app, hugegraph_llm, path="/") # Note: set reload to False in production environment From 07e552dad9343ba9209258426889b6a037668976 Mon Sep 17 00:00:00 2001 From: imbajin Date: Sat, 17 Aug 2024 19:21:22 +0800 Subject: [PATCH 19/31] clean the func & tiny fix --- .../src/hugegraph_llm/api/__init__ .py | 16 ------- .../src/hugegraph_llm/api/rag_api.py | 18 +++---- .../src/hugegraph_llm/demo/rag_web_demo.py | 48 ++++++++----------- 3 files changed, 29 insertions(+), 53 deletions(-) delete mode 100644 hugegraph-llm/src/hugegraph_llm/api/__init__ .py diff --git a/hugegraph-llm/src/hugegraph_llm/api/__init__ .py b/hugegraph-llm/src/hugegraph_llm/api/__init__ .py deleted file mode 100644 index 13a83393..00000000 --- a/hugegraph-llm/src/hugegraph_llm/api/__init__ .py +++ /dev/null @@ -1,16 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. diff --git a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py index aac2a46b..4f90944d 100644 --- a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py +++ b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py @@ -54,8 +54,7 @@ class LLMConfigRequest(BaseModel): port: str = None -def rag_web_http_api(app: FastAPI, graph_rag_func, apply_graph_configuration_func, - apply_llm_configuration_func, apply_embedding_configuration_func): +def rag_http_api(app: FastAPI, graph_rag_func, apply_graph_conf, apply_llm_conf, apply_embedding_conf): @app.post("/rag") def graph_rag_api(req: RAGRequest): result = graph_rag_func(req.query, req.raw_llm, req.vector_only, req.graph_only, req.graph_vector) @@ -65,7 +64,7 @@ def graph_rag_api(req: RAGRequest): @app.post("/graph/config") def graph_config_api(req: GraphConfigRequest): # Accept status code - status_code = apply_graph_configuration_func(req.ip, req.port, req.name, req.user, req.pwd, req.gs, origin_call="http") + status_code = apply_graph_conf(req.ip, req.port, req.name, req.user, req.pwd, req.gs, origin_call="http") if status_code == -1: return {"message": "Unsupported HTTP method"} @@ -80,11 +79,12 @@ def llm_config_api(req: LLMConfigRequest): settings.llm_type = req.llm_type if req.llm_type == "openai": - status_code = apply_llm_configuration_func(req.api_key, req.api_base, req.language_model, req.max_tokens, origin_call="http") + status_code = apply_llm_conf(req.api_key, req.api_base, req.language_model, req.max_tokens, + origin_call="http") elif req.llm_type == "qianfan_wenxin": - status_code = apply_llm_configuration_func(req.api_key, req.secret_key, req.language_model, None, origin_call="http") + status_code = apply_llm_conf(req.api_key, req.secret_key, req.language_model, None, origin_call="http") else: - status_code = apply_llm_configuration_func(req.host, req.port, req.language_model, None, origin_call="http") + status_code = apply_llm_conf(req.host, req.port, req.language_model, None, origin_call="http") if status_code == -1: return {"message": "Unsupported HTTP method"} @@ -99,11 +99,11 @@ def embedding_config_api(req: LLMConfigRequest): settings.embedding_type = req.llm_type if req.llm_type == "openai": - status_code = apply_embedding_configuration_func(req.api_key, req.api_base, req.language_model, origin_call="http") + status_code = apply_embedding_conf(req.api_key, req.api_base, req.language_model, origin_call="http") elif req.llm_type == "qianfan_wenxin": - status_code = apply_embedding_configuration_func(req.api_key, req.api_base, None, origin_call="http") + status_code = apply_embedding_conf(req.api_key, req.api_base, None, origin_call="http") else: - status_code = apply_embedding_configuration_func(req.host, req.port, req.language_model, origin_call="http") + status_code = apply_embedding_conf(req.host, req.port, req.language_model, origin_call="http") if status_code == -1: return {"message": "Unsupported HTTP method"} diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index b93a3169..39cd9619 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -41,20 +41,11 @@ from hugegraph_llm.utils.log import log from hugegraph_llm.utils.hugegraph_utils import get_hg_client from hugegraph_llm.utils.vector_index_utils import clean_vector_index -from hugegraph_llm.api.rag_api import rag_web_http_api +from hugegraph_llm.api.rag_api import rag_http_api -def convert_bool_str(string): - if string == "true": - return True - if string == "false": - return False - raise gr.Error(f"Invalid boolean string: {string}") - - -# TODO: enhance/distinguish the "graph_rag" name to avoid confusion -def graph_rag(text: str, raw_answer: bool, vector_only_answer: bool, - graph_only_answer: bool, graph_vector_answer: bool): +def rag_answer(text: str, raw_answer: bool, vector_only_answer: bool, + graph_only_answer: bool, graph_vector_answer: bool) -> tuple: vector_search = vector_only_answer or graph_vector_answer graph_search = graph_only_answer or graph_vector_answer @@ -89,7 +80,7 @@ def graph_rag(text: str, raw_answer: bool, vector_only_answer: bool, raise gr.Error(f"An unexpected error occurred: {str(e)}") -def build_kg(file, schema, example_prompt, build_mode): # pylint: disable=too-many-branches +def build_kg(file, schema, example_prompt, build_mode) -> str: # pylint: disable=too-many-branches full_path = file.name if full_path.endswith(".txt"): with open(full_path, "r", encoding="utf-8") as f: @@ -142,8 +133,9 @@ def build_kg(file, schema, example_prompt, build_mode): # pylint: disable=too-m log.error(e) raise gr.Error(str(e)) + # todo: origin_call was created to stave off problems with gr.error that needed to be fixed -def test_api_connection(url, method="GET", headers=None, body=None, auth=None, origin_call=None): +def test_api_connection(url, method="GET", headers=None, body=None, auth=None, origin_call=None) -> int: # TODO: use fastapi.request / starlette instead? (Also add a try-catch here) response = None log.debug("Request URL: %s", url) @@ -172,7 +164,7 @@ def test_api_connection(url, method="GET", headers=None, body=None, auth=None, o return response.status_code -def apply_embedding_configuration(arg1, arg2, arg3, origin_call=None): +def apply_embedding_config(arg1, arg2, arg3, origin_call=None) -> int: # Because of ollama, the qianfan_wenxin model is missing the test connect procedure, # so it defaults to 200 so that there is no return value problem status_code = 200 @@ -184,19 +176,19 @@ def apply_embedding_configuration(arg1, arg2, arg3, origin_call=None): test_url = settings.openai_api_base + "/models" headers = {"Authorization": f"Bearer {arg1}"} status_code = test_api_connection(test_url, headers=headers, origin_call=origin_call) + elif embedding_option == "qianfan_wenxin": + settings.qianfan_access_token = arg1 + settings.qianfan_embed_url = arg2 elif embedding_option == "ollama": settings.ollama_host = arg1 settings.ollama_port = int(arg2) settings.ollama_embedding_model = arg3 - elif embedding_option == "qianfan_wenxin": - settings.qianfan_access_token = arg1 - settings.qianfan_embed_url = arg2 settings.update_env() gr.Info("Configured!") return status_code -def apply_graph_configuration(ip, port, name, user, pwd, gs, origin_call=None): +def apply_graph_config(ip, port, name, user, pwd, gs, origin_call=None) -> int: settings.graph_ip = ip settings.graph_port = int(port) settings.graph_name = name @@ -217,7 +209,7 @@ def apply_graph_configuration(ip, port, name, user, pwd, gs, origin_call=None): # Different llm models have different parameters, # so no meaningful argument names are given here -def apply_llm_configuration(arg1, arg2, arg3, arg4, origin_call=None): +def apply_llm_config(arg1, arg2, arg3, arg4, origin_call=None) -> int: llm_option = settings.llm_type # Because of ollama, the qianfan_wenxin model is missing the test connect procedure, # so it defaults to 200 so that there is no return value problem @@ -245,7 +237,7 @@ def apply_llm_configuration(arg1, arg2, arg3, arg4, origin_call=None): return status_code -def create_hugegraph_llm_interface(): +def init_rag_ui() -> gr.Interface: with gr.Blocks() as hugegraph_llm: gr.Markdown( """# HugeGraph LLM RAG Demo @@ -264,7 +256,7 @@ def create_hugegraph_llm_interface(): ] graph_config_button = gr.Button("apply configuration") - graph_config_button.click(apply_graph_configuration, inputs=graph_config_input) # pylint: disable=no-member + graph_config_button.click(apply_graph_config, inputs=graph_config_input) # pylint: disable=no-member gr.Markdown("2. Set up the LLM.") llm_dropdown = gr.Dropdown( @@ -307,7 +299,7 @@ def llm_settings(llm_type): llm_config_input = [] llm_config_button = gr.Button("apply configuration") - llm_config_button.click(apply_llm_configuration, inputs=llm_config_input) # pylint: disable=no-member + llm_config_button.click(apply_llm_config, inputs=llm_config_input) # pylint: disable=no-member gr.Markdown("3. Set up the Embedding.") embedding_dropdown = gr.Dropdown( @@ -349,11 +341,11 @@ def embedding_settings(embedding_type): # Call the separate apply_embedding_configuration function here embedding_config_button.click( - lambda arg1, arg2, arg3: apply_embedding_configuration(arg1, arg2, arg3), + lambda arg1, arg2, arg3: apply_embedding_config(arg1, arg2, arg3), inputs=embedding_config_input ) - embedding_config_button.click(apply_embedding_configuration, # pylint: disable=no-member + embedding_config_button.click(apply_embedding_config, # pylint: disable=no-member inputs=embedding_config_input) gr.Markdown( @@ -443,7 +435,7 @@ def embedding_settings(embedding_type): graph_vector_radio = gr.Radio(choices=[True, False], value=False, label="Graph-Vector Answer") btn = gr.Button("Answer Question") - btn.click(fn=graph_rag, + btn.click(fn=rag_answer, inputs=[inp, raw_radio, vector_only_radio, graph_only_radio, # pylint: disable=no-member graph_vector_radio], outputs=[raw_out, vector_only_out, graph_only_out, graph_vector_out]) @@ -472,9 +464,9 @@ def embedding_settings(embedding_type): args = parser.parse_args() app = FastAPI() - hugegraph_llm = create_hugegraph_llm_interface() + hugegraph_llm = init_rag_ui() - rag_web_http_api(app, graph_rag, apply_graph_configuration, apply_llm_configuration, apply_embedding_configuration) + rag_http_api(app, rag_answer, apply_graph_config, apply_llm_config, apply_embedding_config) app = gr.mount_gradio_app(app, hugegraph_llm, path="/") # Note: set reload to False in production environment From aed8986033fb8d6a89a77959fb2e295c701cbd3a Mon Sep 17 00:00:00 2001 From: Hongjun Li Date: Sat, 17 Aug 2024 20:42:59 +0800 Subject: [PATCH 20/31] Merge branch 'fast-api' of https://github.com/ChenZiHong-Gavin/incubator-hugegraph-ai into fast-api --- .../src/hugegraph_llm/api/rag_api.py | 47 ++++++++++--------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py index 4f90944d..dfe8de18 100644 --- a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py +++ b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py @@ -54,10 +54,10 @@ class LLMConfigRequest(BaseModel): port: str = None -def rag_http_api(app: FastAPI, graph_rag_func, apply_graph_conf, apply_llm_conf, apply_embedding_conf): +def rag_http_api(app: FastAPI, rag_answer_func, apply_graph_conf, apply_llm_conf, apply_embedding_conf): @app.post("/rag") - def graph_rag_api(req: RAGRequest): - result = graph_rag_func(req.query, req.raw_llm, req.vector_only, req.graph_only, req.graph_vector) + def rag_answer_api(req: RAGRequest): + result = rag_answer_func(req.query, req.raw_llm, req.vector_only, req.graph_only, req.graph_vector) return {key: value for key, value in zip( ["raw_llm", "vector_only", "graph_only", "graph_vector"], result) if getattr(req, key)} @@ -66,13 +66,14 @@ def graph_config_api(req: GraphConfigRequest): # Accept status code status_code = apply_graph_conf(req.ip, req.port, req.name, req.user, req.pwd, req.gs, origin_call="http") - if status_code == -1: - return {"message": "Unsupported HTTP method"} + return { + "message": ( + "Connection successful. Configured finished." if 200 <= status_code < 300 else + "Unsupported HTTP method" if status_code == -1 else + f"Connection failed with status code: {status_code}" + ) + } - if 200 <= status_code < 300: - return {"message": "Connection successful. Configured finished."} - else: - return {"message": f"Connection failed with status code: {status_code}"} @app.post("/llm/config") def llm_config_api(req: LLMConfigRequest): @@ -86,13 +87,13 @@ def llm_config_api(req: LLMConfigRequest): else: status_code = apply_llm_conf(req.host, req.port, req.language_model, None, origin_call="http") - if status_code == -1: - return {"message": "Unsupported HTTP method"} - - if 200 <= status_code < 300: - return {"message": "Connection successful. Configured finished."} - else: - return {"message": f"Connection failed with status code: {status_code}"} + return { + "message": ( + "Connection successful. Configured finished." if 200 <= status_code < 300 else + "Unsupported HTTP method" if status_code == -1 else + f"Connection failed with status code: {status_code}" + ) + } @app.post("/embedding/config") def embedding_config_api(req: LLMConfigRequest): @@ -105,10 +106,10 @@ def embedding_config_api(req: LLMConfigRequest): else: status_code = apply_embedding_conf(req.host, req.port, req.language_model, origin_call="http") - if status_code == -1: - return {"message": "Unsupported HTTP method"} - - if 200 <= status_code < 300: - return {"message": "Connection successful. Configured finished."} - else: - return {"message": f"Connection failed with status code: {status_code}"} + return { + "message": ( + "Connection successful. Configured finished." if 200 <= status_code < 300 else + "Unsupported HTTP method" if status_code == -1 else + f"Connection failed with status code: {status_code}" + ) + } From 93aa51668c491a98adcca0fa59dd10f52b7821d0 Mon Sep 17 00:00:00 2001 From: Hongjun Li Date: Sat, 17 Aug 2024 20:44:34 +0800 Subject: [PATCH 21/31] refactor(rag_api): refactor return && some tiny thing --- hugegraph-llm/src/hugegraph_llm/api/rag_api.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py index dfe8de18..1b09cc2c 100644 --- a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py +++ b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py @@ -65,7 +65,6 @@ def rag_answer_api(req: RAGRequest): def graph_config_api(req: GraphConfigRequest): # Accept status code status_code = apply_graph_conf(req.ip, req.port, req.name, req.user, req.pwd, req.gs, origin_call="http") - return { "message": ( "Connection successful. Configured finished." if 200 <= status_code < 300 else @@ -86,7 +85,6 @@ def llm_config_api(req: LLMConfigRequest): status_code = apply_llm_conf(req.api_key, req.secret_key, req.language_model, None, origin_call="http") else: status_code = apply_llm_conf(req.host, req.port, req.language_model, None, origin_call="http") - return { "message": ( "Connection successful. Configured finished." if 200 <= status_code < 300 else @@ -105,7 +103,6 @@ def embedding_config_api(req: LLMConfigRequest): status_code = apply_embedding_conf(req.api_key, req.api_base, None, origin_call="http") else: status_code = apply_embedding_conf(req.host, req.port, req.language_model, origin_call="http") - return { "message": ( "Connection successful. Configured finished." if 200 <= status_code < 300 else From f2c12e6698c94d6956d0bfa3018457f30e0e470f Mon Sep 17 00:00:00 2001 From: Hongjun Li Date: Sat, 17 Aug 2024 21:09:05 +0800 Subject: [PATCH 22/31] fix(rag_web_demo): unify build_kg func return type --- hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index 39cd9619..3c295667 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -128,7 +128,7 @@ def build_kg(file, schema, example_prompt, build_mode) -> str: # pylint: disabl log.debug(builder.operators) try: context = builder.run() - return context + return str(context) except Exception as e: # pylint: disable=broad-exception-caught log.error(e) raise gr.Error(str(e)) From b7ea0090afa00b90c8d75cc47c75b27b2a5aeac6 Mon Sep 17 00:00:00 2001 From: chenzihong <522023320011@smail.nju.edu.cn> Date: Sat, 17 Aug 2024 22:09:06 +0800 Subject: [PATCH 23/31] feat:add try catch for test connection --- .../src/hugegraph_llm/demo/rag_web_demo.py | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index 3c295667..07174dbd 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -134,17 +134,24 @@ def build_kg(file, schema, example_prompt, build_mode) -> str: # pylint: disabl raise gr.Error(str(e)) -# todo: origin_call was created to stave off problems with gr.error that needed to be fixed def test_api_connection(url, method="GET", headers=None, body=None, auth=None, origin_call=None) -> int: - # TODO: use fastapi.request / starlette instead? (Also add a try-catch here) + # TODO: use fastapi.request / starlette instead? response = None log.debug("Request URL: %s", url) - if method.upper() == "GET": - response = requests.get(url, headers=headers, timeout=5, auth=auth) - elif method.upper() == "POST": - response = requests.post(url, headers=headers, json=body, timeout=5, auth=auth) - else: - log.error("Unsupported method: %s", method) + try: + if method.upper() == "GET": + response = requests.get(url, headers=headers, timeout=5, auth=auth) + elif method.upper() == "POST": + response = requests.post(url, headers=headers, json=body, timeout=5, auth=auth) + else: + log.error("Unsupported method: %s", method) + return -1 + except requests.exceptions.RequestException as e: + message = f"Connection failed: {e}" + log.error(message) + if origin_call is None: + raise gr.Error(message) + return -1 if response is None: # Unsupported method encountered @@ -157,10 +164,8 @@ def test_api_connection(url, method="GET", headers=None, body=None, auth=None, o else: message = f"Connection failed with status code: {response.status_code}" log.error(message) - # todo: How to remove raise and gr will render error - # pylint: disable=pointless-exception-statement - if origin_call == None: - raise gr.Error(f"Connection failed with status code: {response.status_code}") + if origin_call is None: + raise gr.Error(message) return response.status_code From ccbfa337eefc0f3f37d735927711f9889bbd6127 Mon Sep 17 00:00:00 2001 From: chenzihong <522023320011@smail.nju.edu.cn> Date: Sat, 17 Aug 2024 22:34:19 +0800 Subject: [PATCH 24/31] feat: use constants for build_mode --- .../src/hugegraph_llm/demo/rag_web_demo.py | 17 ++++++------ .../src/hugegraph_llm/enums/build_mode.py | 26 +++++++++++++++++++ 2 files changed, 35 insertions(+), 8 deletions(-) create mode 100644 hugegraph-llm/src/hugegraph_llm/enums/build_mode.py diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index 07174dbd..09907746 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -42,6 +42,7 @@ from hugegraph_llm.utils.hugegraph_utils import get_hg_client from hugegraph_llm.utils.vector_index_utils import clean_vector_index from hugegraph_llm.api.rag_api import rag_http_api +from hugegraph_llm.enums.build_mode import BuildMode def rag_answer(text: str, raw_answer: bool, vector_only_answer: bool, @@ -92,12 +93,13 @@ def build_kg(file, schema, example_prompt, build_mode) -> str: # pylint: disabl text += para.text text += "\n" elif full_path.endswith(".pdf"): + # TODO: support PDF file raise gr.Error("PDF will be supported later! Try to upload text/docx now") else: raise gr.Error("Please input txt or docx file.") builder = KgBuilder(LLMs().get_llm(), Embeddings().get_embedding(), get_hg_client()) - if build_mode != "Rebuild vertex index": + if build_mode != BuildMode.REBUILD_VERTEX_INDEX: if schema: try: schema = json.loads(schema.strip()) @@ -109,21 +111,20 @@ def build_kg(file, schema, example_prompt, build_mode) -> str: # pylint: disabl return "ERROR: please input schema." builder.chunk_split(text, "paragraph", "zh") - # TODO: avoid hardcoding the "build_mode" strings (use var/constant instead) - if build_mode == "Rebuild Vector": + if build_mode == BuildMode.REBUILD_VECTOR: builder.fetch_graph_data() else: builder.extract_info(example_prompt, "property_graph") # "Test Mode", "Import Mode", "Clear and Import", "Rebuild Vector" - if build_mode != "Test Mode": - if build_mode in ("Clear and Import", "Rebuild Vector"): + if build_mode != BuildMode.TEST_MODE: + if build_mode in (BuildMode.CLEAR_AND_IMPORT, BuildMode.REBUILD_VECTOR): clean_vector_index() builder.build_vector_index() - if build_mode == "Clear and Import": + if build_mode == BuildMode.CLEAR_AND_IMPORT: clean_hg_data() - if build_mode in ("Clear and Import", "Import Mode"): + if build_mode in (BuildMode.CLEAR_AND_IMPORT, BuildMode.IMPORT_MODE): builder.commit_to_hugegraph() - if build_mode != "Test Mode": + if build_mode != BuildMode.TEST_MODE: builder.build_vertex_id_semantic_index() log.debug(builder.operators) try: diff --git a/hugegraph-llm/src/hugegraph_llm/enums/build_mode.py b/hugegraph-llm/src/hugegraph_llm/enums/build_mode.py new file mode 100644 index 00000000..1dd6eea5 --- /dev/null +++ b/hugegraph-llm/src/hugegraph_llm/enums/build_mode.py @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +from enum import Enum + +class BuildMode(Enum): + REBUILD_VECTOR = "Rebuild Vector" + TEST_MODE = "Test Mode" + IMPORT_MODE = "Import Mode" + CLEAR_AND_IMPORT = "Clear and Import" + REBUILD_VERTEX_INDEX = "Rebuild vertex index" From 83950b9cc2c8dd16a1545237d4df91a717781931 Mon Sep 17 00:00:00 2001 From: Hongjun Li Date: Sun, 18 Aug 2024 05:12:07 +0800 Subject: [PATCH 25/31] style(rag_web_demo,rag_api): use code_format_and_analysis.sh format code --- .../src/hugegraph_llm/api/rag_api.py | 45 ++++++--- .../src/hugegraph_llm/demo/rag_web_demo.py | 95 ++++++++----------- 2 files changed, 72 insertions(+), 68 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py index 1b09cc2c..d45599ac 100644 --- a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py +++ b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py @@ -40,7 +40,7 @@ class GraphConfigRequest(BaseModel): class LLMConfigRequest(BaseModel): llm_type: str - # The common parameters shared by OpenAI, Qianfan Wenxin, + # The common parameters shared by OpenAI, Qianfan Wenxin, # and OLLAMA platforms. api_key: str api_base: str @@ -58,8 +58,11 @@ def rag_http_api(app: FastAPI, rag_answer_func, apply_graph_conf, apply_llm_conf @app.post("/rag") def rag_answer_api(req: RAGRequest): result = rag_answer_func(req.query, req.raw_llm, req.vector_only, req.graph_only, req.graph_vector) - return {key: value for key, value in zip( - ["raw_llm", "vector_only", "graph_only", "graph_vector"], result) if getattr(req, key)} + return { + key: value + for key, value in zip(["raw_llm", "vector_only", "graph_only", "graph_vector"], result) + if getattr(req, key) + } @app.post("/graph/config") def graph_config_api(req: GraphConfigRequest): @@ -67,29 +70,37 @@ def graph_config_api(req: GraphConfigRequest): status_code = apply_graph_conf(req.ip, req.port, req.name, req.user, req.pwd, req.gs, origin_call="http") return { "message": ( - "Connection successful. Configured finished." if 200 <= status_code < 300 else - "Unsupported HTTP method" if status_code == -1 else - f"Connection failed with status code: {status_code}" + "Connection successful. Configured finished." + if 200 <= status_code < 300 + else ( + "Unsupported HTTP method" + if status_code == -1 + else f"Connection failed with status code: {status_code}" + ) ) } - @app.post("/llm/config") def llm_config_api(req: LLMConfigRequest): settings.llm_type = req.llm_type if req.llm_type == "openai": - status_code = apply_llm_conf(req.api_key, req.api_base, req.language_model, req.max_tokens, - origin_call="http") + status_code = apply_llm_conf( + req.api_key, req.api_base, req.language_model, req.max_tokens, origin_call="http" + ) elif req.llm_type == "qianfan_wenxin": status_code = apply_llm_conf(req.api_key, req.secret_key, req.language_model, None, origin_call="http") else: status_code = apply_llm_conf(req.host, req.port, req.language_model, None, origin_call="http") return { "message": ( - "Connection successful. Configured finished." if 200 <= status_code < 300 else - "Unsupported HTTP method" if status_code == -1 else - f"Connection failed with status code: {status_code}" + "Connection successful. Configured finished." + if 200 <= status_code < 300 + else ( + "Unsupported HTTP method" + if status_code == -1 + else f"Connection failed with status code: {status_code}" + ) ) } @@ -105,8 +116,12 @@ def embedding_config_api(req: LLMConfigRequest): status_code = apply_embedding_conf(req.host, req.port, req.language_model, origin_call="http") return { "message": ( - "Connection successful. Configured finished." if 200 <= status_code < 300 else - "Unsupported HTTP method" if status_code == -1 else - f"Connection failed with status code: {status_code}" + "Connection successful. Configured finished." + if 200 <= status_code < 300 + else ( + "Unsupported HTTP method" + if status_code == -1 + else f"Connection failed with status code: {status_code}" + ) ) } diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index 09907746..757c2a7b 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -33,11 +33,7 @@ from hugegraph_llm.operators.kg_construction_task import KgBuilder from hugegraph_llm.config import settings, resource_path from hugegraph_llm.operators.llm_op.property_graph_extract import SCHEMA_EXAMPLE_PROMPT -from hugegraph_llm.utils.hugegraph_utils import ( - init_hg_test_data, - run_gremlin_query, - clean_hg_data -) +from hugegraph_llm.utils.hugegraph_utils import init_hg_test_data, run_gremlin_query, clean_hg_data from hugegraph_llm.utils.log import log from hugegraph_llm.utils.hugegraph_utils import get_hg_client from hugegraph_llm.utils.vector_index_utils import clean_vector_index @@ -45,8 +41,9 @@ from hugegraph_llm.enums.build_mode import BuildMode -def rag_answer(text: str, raw_answer: bool, vector_only_answer: bool, - graph_only_answer: bool, graph_vector_answer: bool) -> tuple: +def rag_answer( + text: str, raw_answer: bool, vector_only_answer: bool, graph_only_answer: bool, graph_vector_answer: bool +) -> tuple: vector_search = vector_only_answer or graph_vector_answer graph_search = graph_only_answer or graph_vector_answer @@ -62,7 +59,7 @@ def rag_answer(text: str, raw_answer: bool, vector_only_answer: bool, raw_answer=raw_answer, vector_only_answer=vector_only_answer, graph_only_answer=graph_only_answer, - graph_vector_answer=graph_vector_answer + graph_vector_answer=graph_vector_answer, ).run(verbose=True, query=text) try: @@ -71,7 +68,7 @@ def rag_answer(text: str, raw_answer: bool, vector_only_answer: bool, context.get("raw_answer", ""), context.get("vector_only_answer", ""), context.get("graph_only_answer", ""), - context.get("graph_vector_answer", "") + context.get("graph_vector_answer", ""), ) except ValueError as e: log.error(e) @@ -265,11 +262,7 @@ def init_rag_ui() -> gr.Interface: graph_config_button.click(apply_graph_config, inputs=graph_config_input) # pylint: disable=no-member gr.Markdown("2. Set up the LLM.") - llm_dropdown = gr.Dropdown( - choices=["openai", "qianfan_wenxin", "ollama"], - value=settings.llm_type, - label="LLM" - ) + llm_dropdown = gr.Dropdown(choices=["openai", "qianfan_wenxin", "ollama"], value=settings.llm_type, label="LLM") @gr.render(inputs=[llm_dropdown]) def llm_settings(llm_type): @@ -288,17 +281,15 @@ def llm_settings(llm_type): gr.Textbox(value=settings.ollama_host, label="host"), gr.Textbox(value=str(settings.ollama_port), label="port"), gr.Textbox(value=settings.ollama_language_model, label="model_name"), - gr.Textbox(value="", visible=False) + gr.Textbox(value="", visible=False), ] elif llm_type == "qianfan_wenxin": with gr.Row(): llm_config_input = [ - gr.Textbox(value=settings.qianfan_api_key, label="api_key", - type="password"), - gr.Textbox(value=settings.qianfan_secret_key, label="secret_key", - type="password"), + gr.Textbox(value=settings.qianfan_api_key, label="api_key", type="password"), + gr.Textbox(value=settings.qianfan_secret_key, label="secret_key", type="password"), gr.Textbox(value=settings.qianfan_language_model, label="model_name"), - gr.Textbox(value="", visible=False) + gr.Textbox(value="", visible=False), ] log.debug(llm_config_input) else: @@ -309,9 +300,7 @@ def llm_settings(llm_type): gr.Markdown("3. Set up the Embedding.") embedding_dropdown = gr.Dropdown( - choices=["openai", "ollama", "qianfan_wenxin"], - value=settings.embedding_type, - label="Embedding" + choices=["openai", "ollama", "qianfan_wenxin"], value=settings.embedding_type, label="Embedding" ) @gr.render(inputs=[embedding_dropdown]) @@ -322,15 +311,13 @@ def embedding_settings(embedding_type): embedding_config_input = [ gr.Textbox(value=settings.openai_api_key, label="api_key", type="password"), gr.Textbox(value=settings.openai_api_base, label="api_base"), - gr.Textbox(value=settings.openai_embedding_model, label="model_name") + gr.Textbox(value=settings.openai_embedding_model, label="model_name"), ] elif embedding_type == "qianfan_wenxin": with gr.Row(): embedding_config_input = [ - gr.Textbox(value=settings.qianfan_api_key, label="api_key", - type="password"), - gr.Textbox(value=settings.qianfan_secret_key, label="secret_key", - type="password"), + gr.Textbox(value=settings.qianfan_api_key, label="api_key", type="password"), + gr.Textbox(value=settings.qianfan_secret_key, label="secret_key", type="password"), gr.Textbox(value=settings.qianfan_embedding_model, label="model_name"), ] elif embedding_type == "ollama": @@ -347,12 +334,12 @@ def embedding_settings(embedding_type): # Call the separate apply_embedding_configuration function here embedding_config_button.click( - lambda arg1, arg2, arg3: apply_embedding_config(arg1, arg2, arg3), - inputs=embedding_config_input + lambda arg1, arg2, arg3: apply_embedding_config(arg1, arg2, arg3), inputs=embedding_config_input ) - embedding_config_button.click(apply_embedding_config, # pylint: disable=no-member - inputs=embedding_config_input) + embedding_config_button.click( + apply_embedding_config, inputs=embedding_config_input # pylint: disable=no-member + ) gr.Markdown( """## 1. Build vector/graph RAG (💡) @@ -406,21 +393,20 @@ def embedding_settings(embedding_type): }""" with gr.Row(): - input_file = gr.File(value=os.path.join(resource_path, "demo", "test.txt"), - label="Document") + input_file = gr.File(value=os.path.join(resource_path, "demo", "test.txt"), label="Document") input_schema = gr.Textbox(value=SCHEMA, label="Schema") - info_extract_template = gr.Textbox(value=SCHEMA_EXAMPLE_PROMPT, - label="Info extract head") + info_extract_template = gr.Textbox(value=SCHEMA_EXAMPLE_PROMPT, label="Info extract head") with gr.Column(): - mode = gr.Radio(choices=["Test Mode", "Import Mode", "Clear and Import", "Rebuild Vector"], - value="Test Mode", label="Build mode") + mode = gr.Radio( + choices=["Test Mode", "Import Mode", "Clear and Import", "Rebuild Vector"], + value="Test Mode", + label="Build mode", + ) btn = gr.Button("Build Vector/Graph RAG") with gr.Row(): out = gr.Textbox(label="Output", show_copy_button=True) btn.click( # pylint: disable=no-member - fn=build_kg, - inputs=[input_file, input_schema, info_extract_template, mode], - outputs=out + fn=build_kg, inputs=[input_file, input_schema, info_extract_template, mode], outputs=out ) gr.Markdown("""## 2. RAG with HugeGraph 📖""") @@ -432,19 +418,22 @@ def embedding_settings(embedding_type): graph_only_out = gr.Textbox(label="Graph-only Answer", show_copy_button=True) graph_vector_out = gr.Textbox(label="Graph-Vector Answer", show_copy_button=True) with gr.Column(scale=1): - raw_radio = gr.Radio(choices=[True, False], value=True, - label="Basic LLM Answer") - vector_only_radio = gr.Radio(choices=[True, False], value=False, - label="Vector-only Answer") - graph_only_radio = gr.Radio(choices=[True, False], value=False, - label="Graph-only Answer") - graph_vector_radio = gr.Radio(choices=[True, False], value=False, - label="Graph-Vector Answer") + raw_radio = gr.Radio(choices=[True, False], value=True, label="Basic LLM Answer") + vector_only_radio = gr.Radio(choices=[True, False], value=False, label="Vector-only Answer") + graph_only_radio = gr.Radio(choices=[True, False], value=False, label="Graph-only Answer") + graph_vector_radio = gr.Radio(choices=[True, False], value=False, label="Graph-Vector Answer") btn = gr.Button("Answer Question") - btn.click(fn=rag_answer, - inputs=[inp, raw_radio, vector_only_radio, graph_only_radio, # pylint: disable=no-member - graph_vector_radio], - outputs=[raw_out, vector_only_out, graph_only_out, graph_vector_out]) + btn.click( + fn=rag_answer, + inputs=[ + inp, + raw_radio, + vector_only_radio, + graph_only_radio, # pylint: disable=no-member + graph_vector_radio, + ], + outputs=[raw_out, vector_only_out, graph_only_out, graph_vector_out], + ) gr.Markdown("""## 3. Others (🚧) """) with gr.Row(): From ec2f8e0c707b1079a446b9bd4501a0406a00abc8 Mon Sep 17 00:00:00 2001 From: chenzihong <522023320011@smail.nju.edu.cn> Date: Sun, 18 Aug 2024 11:11:50 +0800 Subject: [PATCH 26/31] fix: compare constant values --- .../src/hugegraph_llm/demo/rag_web_demo.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index 757c2a7b..8eda2eb9 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -96,7 +96,7 @@ def build_kg(file, schema, example_prompt, build_mode) -> str: # pylint: disabl raise gr.Error("Please input txt or docx file.") builder = KgBuilder(LLMs().get_llm(), Embeddings().get_embedding(), get_hg_client()) - if build_mode != BuildMode.REBUILD_VERTEX_INDEX: + if build_mode != BuildMode.REBUILD_VERTEX_INDEX.value: if schema: try: schema = json.loads(schema.strip()) @@ -108,20 +108,20 @@ def build_kg(file, schema, example_prompt, build_mode) -> str: # pylint: disabl return "ERROR: please input schema." builder.chunk_split(text, "paragraph", "zh") - if build_mode == BuildMode.REBUILD_VECTOR: + if build_mode == BuildMode.REBUILD_VECTOR.value: builder.fetch_graph_data() else: builder.extract_info(example_prompt, "property_graph") # "Test Mode", "Import Mode", "Clear and Import", "Rebuild Vector" - if build_mode != BuildMode.TEST_MODE: - if build_mode in (BuildMode.CLEAR_AND_IMPORT, BuildMode.REBUILD_VECTOR): + if build_mode != BuildMode.TEST_MODE.value: + if build_mode in (BuildMode.CLEAR_AND_IMPORT.value, BuildMode.REBUILD_VECTOR.value): clean_vector_index() builder.build_vector_index() - if build_mode == BuildMode.CLEAR_AND_IMPORT: + if build_mode == BuildMode.CLEAR_AND_IMPORT.value: clean_hg_data() - if build_mode in (BuildMode.CLEAR_AND_IMPORT, BuildMode.IMPORT_MODE): + if build_mode in (BuildMode.CLEAR_AND_IMPORT.value, BuildMode.IMPORT_MODE.value): builder.commit_to_hugegraph() - if build_mode != BuildMode.TEST_MODE: + if build_mode != BuildMode.TEST_MODE.value: builder.build_vertex_id_semantic_index() log.debug(builder.operators) try: From c2f91bcf95c065af04f284ee3ce06fdb997e4785 Mon Sep 17 00:00:00 2001 From: Hongjun Li Date: Sun, 18 Aug 2024 21:05:00 +0800 Subject: [PATCH 27/31] feat(rag_api): Split wrapper api --- .../hugegraph_llm/api/exceptions/__init__.py | 16 +++ .../api/exceptions/rag_exceptions.py | 34 +++++++ .../src/hugegraph_llm/api/models/__init__.py | 16 +++ .../hugegraph_llm/api/models/rag_requests.py | 52 ++++++++++ .../hugegraph_llm/api/models/rag_response.py | 22 +++++ .../src/hugegraph_llm/api/rag_api.py | 99 ++++--------------- .../src/hugegraph_llm/demo/rag_web_demo.py | 71 ++++++++----- 7 files changed, 204 insertions(+), 106 deletions(-) create mode 100644 hugegraph-llm/src/hugegraph_llm/api/exceptions/__init__.py create mode 100644 hugegraph-llm/src/hugegraph_llm/api/exceptions/rag_exceptions.py create mode 100644 hugegraph-llm/src/hugegraph_llm/api/models/__init__.py create mode 100644 hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py create mode 100644 hugegraph-llm/src/hugegraph_llm/api/models/rag_response.py diff --git a/hugegraph-llm/src/hugegraph_llm/api/exceptions/__init__.py b/hugegraph-llm/src/hugegraph_llm/api/exceptions/__init__.py new file mode 100644 index 00000000..13a83393 --- /dev/null +++ b/hugegraph-llm/src/hugegraph_llm/api/exceptions/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/hugegraph-llm/src/hugegraph_llm/api/exceptions/rag_exceptions.py b/hugegraph-llm/src/hugegraph_llm/api/exceptions/rag_exceptions.py new file mode 100644 index 00000000..40bdb561 --- /dev/null +++ b/hugegraph-llm/src/hugegraph_llm/api/exceptions/rag_exceptions.py @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from fastapi import HTTPException +from hugegraph_llm.api.models.rag_response import RAGResponse + +class UnsupportedMethodException(HTTPException): + def __init__(self): + super().__init__(status_code=400, detail="Connection failed with error code: -1") + +class ConnectionFailedException(HTTPException): + def __init__(self, status_code: int, message: str): + super().__init__(status_code=status_code, detail=message) + +def generate_response(response: RAGResponse) -> dict: + if response.status_code == -1: + raise UnsupportedMethodException() + elif not (200 <= response.status_code < 300): + raise ConnectionFailedException(response.status_code, response.message) + return {"message": "Connection successful. Configured finished."} \ No newline at end of file diff --git a/hugegraph-llm/src/hugegraph_llm/api/models/__init__.py b/hugegraph-llm/src/hugegraph_llm/api/models/__init__.py new file mode 100644 index 00000000..13a83393 --- /dev/null +++ b/hugegraph-llm/src/hugegraph_llm/api/models/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py b/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py new file mode 100644 index 00000000..fd77a222 --- /dev/null +++ b/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pydantic import BaseModel +from typing import Optional + + +class RAGRequest(BaseModel): + query: str + raw_llm: Optional[bool] = True + vector_only: Optional[bool] = False + graph_only: Optional[bool] = False + graph_vector: Optional[bool] = False + + +class GraphConfigRequest(BaseModel): + ip: str = "127.0.0.1" + port: str = "8080" + name: str = "hugegraph" + user: str = "xxx" + pwd: str = "xxx" + gs: str = None + + +class LLMConfigRequest(BaseModel): + llm_type: str + # The common parameters shared by OpenAI, Qianfan Wenxin, + # and OLLAMA platforms. + api_key: str + api_base: str + language_model: str + # Openai-only properties + max_tokens: str = None + # qianfan-wenxin-only properties + secret_key: str = None + # ollama-only properties + host: str = None + port: str = None \ No newline at end of file diff --git a/hugegraph-llm/src/hugegraph_llm/api/models/rag_response.py b/hugegraph-llm/src/hugegraph_llm/api/models/rag_response.py new file mode 100644 index 00000000..720777c2 --- /dev/null +++ b/hugegraph-llm/src/hugegraph_llm/api/models/rag_response.py @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pydantic import BaseModel + +class RAGResponse(BaseModel): + status_code: int = -1 + message: str = "" \ No newline at end of file diff --git a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py index d45599ac..236c52b6 100644 --- a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py +++ b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py @@ -15,47 +15,14 @@ # specific language governing permissions and limitations # under the License. -from fastapi import FastAPI -from pydantic import BaseModel -from typing import Optional +from fastapi import FastAPI, status from hugegraph_llm.config import settings - - -class RAGRequest(BaseModel): - query: str - raw_llm: Optional[bool] = True - vector_only: Optional[bool] = False - graph_only: Optional[bool] = False - graph_vector: Optional[bool] = False - - -class GraphConfigRequest(BaseModel): - ip: str = "127.0.0.1" - port: str = "8080" - name: str = "hugegraph" - user: str = "xxx" - pwd: str = "xxx" - gs: str = None - - -class LLMConfigRequest(BaseModel): - llm_type: str - # The common parameters shared by OpenAI, Qianfan Wenxin, - # and OLLAMA platforms. - api_key: str - api_base: str - language_model: str - # Openai-only properties - max_tokens: str = None - # qianfan-wenxin-only properties - secret_key: str = None - # ollama-only properties - host: str = None - port: str = None +from hugegraph_llm.api.models.rag_requests import RAGRequest, GraphConfigRequest, LLMConfigRequest +from hugegraph_llm.api.exceptions.rag_exceptions import generate_response def rag_http_api(app: FastAPI, rag_answer_func, apply_graph_conf, apply_llm_conf, apply_embedding_conf): - @app.post("/rag") + @app.post("/rag", status_code=status.HTTP_200_OK) def rag_answer_api(req: RAGRequest): result = rag_answer_func(req.query, req.raw_llm, req.vector_only, req.graph_only, req.graph_vector) return { @@ -64,64 +31,36 @@ def rag_answer_api(req: RAGRequest): if getattr(req, key) } - @app.post("/graph/config") + @app.post("/config/graph", status_code=status.HTTP_201_CREATED) def graph_config_api(req: GraphConfigRequest): # Accept status code - status_code = apply_graph_conf(req.ip, req.port, req.name, req.user, req.pwd, req.gs, origin_call="http") - return { - "message": ( - "Connection successful. Configured finished." - if 200 <= status_code < 300 - else ( - "Unsupported HTTP method" - if status_code == -1 - else f"Connection failed with status code: {status_code}" - ) - ) - } + response = apply_graph_conf(req.ip, req.port, req.name, req.user, req.pwd, req.gs, origin_call="http") + return generate_response(response) - @app.post("/llm/config") + @app.post("/config/llm", status_code=status.HTTP_201_CREATED) def llm_config_api(req: LLMConfigRequest): settings.llm_type = req.llm_type if req.llm_type == "openai": - status_code = apply_llm_conf( + response = apply_llm_conf( req.api_key, req.api_base, req.language_model, req.max_tokens, origin_call="http" ) elif req.llm_type == "qianfan_wenxin": - status_code = apply_llm_conf(req.api_key, req.secret_key, req.language_model, None, origin_call="http") + response = apply_llm_conf(req.api_key, req.secret_key, req.language_model, None, origin_call="http") else: - status_code = apply_llm_conf(req.host, req.port, req.language_model, None, origin_call="http") - return { - "message": ( - "Connection successful. Configured finished." - if 200 <= status_code < 300 - else ( - "Unsupported HTTP method" - if status_code == -1 - else f"Connection failed with status code: {status_code}" - ) - ) - } + response = apply_llm_conf(req.host, req.port, req.language_model, None, origin_call="http") + + print(response) + return generate_response(response) - @app.post("/embedding/config") + @app.post("/config/embedding", status_code=status.HTTP_201_CREATED) def embedding_config_api(req: LLMConfigRequest): settings.embedding_type = req.llm_type if req.llm_type == "openai": - status_code = apply_embedding_conf(req.api_key, req.api_base, req.language_model, origin_call="http") + response = apply_embedding_conf(req.api_key, req.api_base, req.language_model, origin_call="http") elif req.llm_type == "qianfan_wenxin": - status_code = apply_embedding_conf(req.api_key, req.api_base, None, origin_call="http") + response = apply_embedding_conf(req.api_key, req.api_base, None, origin_call="http") else: - status_code = apply_embedding_conf(req.host, req.port, req.language_model, origin_call="http") - return { - "message": ( - "Connection successful. Configured finished." - if 200 <= status_code < 300 - else ( - "Unsupported HTTP method" - if status_code == -1 - else f"Connection failed with status code: {status_code}" - ) - ) - } + response = apply_embedding_conf(req.host, req.port, req.language_model, origin_call="http") + return generate_response(response) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index 8eda2eb9..4302f64f 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -38,6 +38,7 @@ from hugegraph_llm.utils.hugegraph_utils import get_hg_client from hugegraph_llm.utils.vector_index_utils import clean_vector_index from hugegraph_llm.api.rag_api import rag_http_api +from hugegraph_llm.api.models.rag_response import RAGResponse from hugegraph_llm.enums.build_mode import BuildMode @@ -132,9 +133,12 @@ def build_kg(file, schema, example_prompt, build_mode) -> str: # pylint: disabl raise gr.Error(str(e)) -def test_api_connection(url, method="GET", headers=None, body=None, auth=None, origin_call=None) -> int: +def test_api_connection(url, method="GET", headers=None, body=None, auth=None, origin_call=None) -> RAGResponse: # TODO: use fastapi.request / starlette instead? response = None + + return_dict = RAGResponse() + log.debug("Request URL: %s", url) try: if method.upper() == "GET": @@ -143,34 +147,47 @@ def test_api_connection(url, method="GET", headers=None, body=None, auth=None, o response = requests.post(url, headers=headers, json=body, timeout=5, auth=auth) else: log.error("Unsupported method: %s", method) - return -1 except requests.exceptions.RequestException as e: message = f"Connection failed: {e}" log.error(message) if origin_call is None: raise gr.Error(message) - return -1 + return return_dict if response is None: # Unsupported method encountered - return -1 + if origin_call is None: + raise gr.Error("Connection failed with error code: -1") + return return_dict if 200 <= response.status_code < 300: message = "Connection successful. Configured finished." log.info(message) gr.Info(message) + return_dict.status_code = response.status_code + return_dict.message = message else: message = f"Connection failed with status code: {response.status_code}" log.error(message) - if origin_call is None: - raise gr.Error(message) - return response.status_code - - -def apply_embedding_config(arg1, arg2, arg3, origin_call=None) -> int: - # Because of ollama, the qianfan_wenxin model is missing the test connect procedure, + try: + # TODO: Feedback on graph name errors alone, but needs to be modified + if origin_call is None: + raise gr.Error(json.loads(response.text)["message"]) + return_dict.status_code = response.status_code + return_dict.message = json.loads(response.text)["message"] + return return_dict + # TODO: The problem is that only the message returned by rag can be processed, and the other return values are not processed + except json.JSONDecodeError as e: + if origin_call is None: + raise gr.Error(message) + return_dict.status_code = response.status_code + return_dict.message = message + return return_dict + + +def apply_embedding_config(arg1, arg2, arg3, origin_call=None) -> RAGResponse: + # TODO:Because of ollama, the qianfan_wenxin model is missing the test connect procedure, # so it defaults to 200 so that there is no return value problem - status_code = 200 embedding_option = settings.embedding_type if embedding_option == "openai": settings.openai_api_key = arg1 @@ -178,20 +195,24 @@ def apply_embedding_config(arg1, arg2, arg3, origin_call=None) -> int: settings.openai_embedding_model = arg3 test_url = settings.openai_api_base + "/models" headers = {"Authorization": f"Bearer {arg1}"} - status_code = test_api_connection(test_url, headers=headers, origin_call=origin_call) + response = test_api_connection(test_url, headers=headers, origin_call=origin_call) elif embedding_option == "qianfan_wenxin": settings.qianfan_access_token = arg1 settings.qianfan_embed_url = arg2 + # TODO: add test connection + response = RAGResponse(status_code=200) elif embedding_option == "ollama": settings.ollama_host = arg1 settings.ollama_port = int(arg2) settings.ollama_embedding_model = arg3 + # TODO: add test connection + response = RAGResponse(status_code=200) settings.update_env() gr.Info("Configured!") - return status_code + return response -def apply_graph_config(ip, port, name, user, pwd, gs, origin_call=None) -> int: +def apply_graph_config(ip, port, name, user, pwd, gs, origin_call=None) -> RAGResponse: settings.graph_ip = ip settings.graph_port = int(port) settings.graph_name = name @@ -205,18 +226,17 @@ def apply_graph_config(ip, port, name, user, pwd, gs, origin_call=None) -> int: test_url = f"http://{ip}:{port}/graphs/{name}/schema" auth = HTTPBasicAuth(user, pwd) # for http api return status - status_code = test_api_connection(test_url, auth=auth, origin_call=origin_call) + response = test_api_connection(test_url, auth=auth, origin_call=origin_call) settings.update_env() - return status_code + return response # Different llm models have different parameters, # so no meaningful argument names are given here -def apply_llm_config(arg1, arg2, arg3, arg4, origin_call=None) -> int: +def apply_llm_config(arg1, arg2, arg3, arg4, origin_call=None) -> RAGResponse: llm_option = settings.llm_type # Because of ollama, the qianfan_wenxin model is missing the test connect procedure, # so it defaults to 200 so that there is no return value problem - status_code = 200 if llm_option == "openai": settings.openai_api_key = arg1 settings.openai_api_base = arg2 @@ -224,20 +244,23 @@ def apply_llm_config(arg1, arg2, arg3, arg4, origin_call=None) -> int: settings.openai_max_tokens = int(arg4) test_url = settings.openai_api_base + "/models" headers = {"Authorization": f"Bearer {arg1}"} - status_code = test_api_connection(test_url, headers=headers, origin_call=origin_call) + response = test_api_connection(test_url, headers=headers, origin_call=origin_call) elif llm_option == "qianfan_wenxin": settings.qianfan_api_key = arg1 settings.qianfan_secret_key = arg2 settings.qianfan_language_model = arg3 - # TODO: test the connection + # TODO: add test connection + response = RAGResponse(status_code=200) # test_url = "https://aip.baidubce.com/oauth/2.0/token" # POST elif llm_option == "ollama": settings.ollama_host = arg1 settings.ollama_port = int(arg2) settings.ollama_language_model = arg3 + # TODO: add test connection + response = RAGResponse(status_code=200) gr.Info("Configured!") settings.update_env() - return status_code + return response def init_rag_ui() -> gr.Interface: @@ -333,10 +356,6 @@ def embedding_settings(embedding_type): embedding_config_button = gr.Button("apply configuration") # Call the separate apply_embedding_configuration function here - embedding_config_button.click( - lambda arg1, arg2, arg3: apply_embedding_config(arg1, arg2, arg3), inputs=embedding_config_input - ) - embedding_config_button.click( apply_embedding_config, inputs=embedding_config_input # pylint: disable=no-member ) From 245583b7240955a67cf0a2a95237b7b7a27f873e Mon Sep 17 00:00:00 2001 From: Hongjun Li Date: Sun, 18 Aug 2024 21:30:56 +0800 Subject: [PATCH 28/31] fix(rag_web_demo): fix test_api_connection --- hugegraph-llm/src/hugegraph_llm/api/rag_api.py | 2 -- hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py | 8 +++----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py index 236c52b6..fa7c93ec 100644 --- a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py +++ b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py @@ -49,8 +49,6 @@ def llm_config_api(req: LLMConfigRequest): response = apply_llm_conf(req.api_key, req.secret_key, req.language_model, None, origin_call="http") else: response = apply_llm_conf(req.host, req.port, req.language_model, None, origin_call="http") - - print(response) return generate_response(response) @app.post("/config/embedding", status_code=status.HTTP_201_CREATED) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index 4302f64f..e37d973e 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -136,7 +136,6 @@ def build_kg(file, schema, example_prompt, build_mode) -> str: # pylint: disabl def test_api_connection(url, method="GET", headers=None, body=None, auth=None, origin_call=None) -> RAGResponse: # TODO: use fastapi.request / starlette instead? response = None - return_dict = RAGResponse() log.debug("Request URL: %s", url) @@ -153,19 +152,18 @@ def test_api_connection(url, method="GET", headers=None, body=None, auth=None, o if origin_call is None: raise gr.Error(message) return return_dict - if response is None: # Unsupported method encountered if origin_call is None: raise gr.Error("Connection failed with error code: -1") return return_dict - if 200 <= response.status_code < 300: message = "Connection successful. Configured finished." log.info(message) gr.Info(message) return_dict.status_code = response.status_code return_dict.message = message + return return_dict else: message = f"Connection failed with status code: {response.status_code}" log.error(message) @@ -250,14 +248,14 @@ def apply_llm_config(arg1, arg2, arg3, arg4, origin_call=None) -> RAGResponse: settings.qianfan_secret_key = arg2 settings.qianfan_language_model = arg3 # TODO: add test connection - response = RAGResponse(status_code=200) + response = RAGResponse(status_code=200, message="") # test_url = "https://aip.baidubce.com/oauth/2.0/token" # POST elif llm_option == "ollama": settings.ollama_host = arg1 settings.ollama_port = int(arg2) settings.ollama_language_model = arg3 # TODO: add test connection - response = RAGResponse(status_code=200) + response = RAGResponse(status_code=200, message="") gr.Info("Configured!") settings.update_env() return response From 6cb562b7f465d66f73c180c33425d55147322ab6 Mon Sep 17 00:00:00 2001 From: imbajin Date: Sun, 18 Aug 2024 21:54:52 +0800 Subject: [PATCH 29/31] Update rag_web_demo.py --- .../src/hugegraph_llm/demo/rag_web_demo.py | 39 ++++++++++--------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index 4302f64f..4c2c96d2 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -43,7 +43,7 @@ def rag_answer( - text: str, raw_answer: bool, vector_only_answer: bool, graph_only_answer: bool, graph_vector_answer: bool + text: str, raw_answer: bool, vector_only_answer: bool, graph_only_answer: bool, graph_vector_answer: bool ) -> tuple: vector_search = vector_only_answer or graph_vector_answer graph_search = graph_only_answer or graph_vector_answer @@ -133,20 +133,20 @@ def build_kg(file, schema, example_prompt, build_mode) -> str: # pylint: disabl raise gr.Error(str(e)) -def test_api_connection(url, method="GET", headers=None, body=None, auth=None, origin_call=None) -> RAGResponse: +def test_api_connection(url, method="GET", + headers=None, params=None, body=None, auth=None, origin_call=None) -> RAGResponse: # TODO: use fastapi.request / starlette instead? response = None - - return_dict = RAGResponse() + global return_dict log.debug("Request URL: %s", url) try: if method.upper() == "GET": - response = requests.get(url, headers=headers, timeout=5, auth=auth) + response = requests.get(url, headers=headers, params=params, timeout=5, auth=auth) elif method.upper() == "POST": - response = requests.post(url, headers=headers, json=body, timeout=5, auth=auth) + response = requests.post(url, headers=headers, params=params, json=body, timeout=5, auth=auth) else: - log.error("Unsupported method: %s", method) + raise gr.Error("Unsupported method") except requests.exceptions.RequestException as e: message = f"Connection failed: {e}" log.error(message) @@ -154,12 +154,6 @@ def test_api_connection(url, method="GET", headers=None, body=None, auth=None, o raise gr.Error(message) return return_dict - if response is None: - # Unsupported method encountered - if origin_call is None: - raise gr.Error("Connection failed with error code: -1") - return return_dict - if 200 <= response.status_code < 300: message = "Connection successful. Configured finished." log.info(message) @@ -176,7 +170,7 @@ def test_api_connection(url, method="GET", headers=None, body=None, auth=None, o return_dict.status_code = response.status_code return_dict.message = json.loads(response.text)["message"] return return_dict - # TODO: The problem is that only the message returned by rag can be processed, and the other return values are not processed + # TODO: Only the message returned by rag can be processed, and the other return values can't be processed except json.JSONDecodeError as e: if origin_call is None: raise gr.Error(message) @@ -188,6 +182,7 @@ def test_api_connection(url, method="GET", headers=None, body=None, auth=None, o def apply_embedding_config(arg1, arg2, arg3, origin_call=None) -> RAGResponse: # TODO:Because of ollama, the qianfan_wenxin model is missing the test connect procedure, # so it defaults to 200 so that there is no return value problem + global response embedding_option = settings.embedding_type if embedding_option == "openai": settings.openai_api_key = arg1 @@ -197,10 +192,16 @@ def apply_embedding_config(arg1, arg2, arg3, origin_call=None) -> RAGResponse: headers = {"Authorization": f"Bearer {arg1}"} response = test_api_connection(test_url, headers=headers, origin_call=origin_call) elif embedding_option == "qianfan_wenxin": - settings.qianfan_access_token = arg1 - settings.qianfan_embed_url = arg2 - # TODO: add test connection - response = RAGResponse(status_code=200) + settings.qianfan_api_key = arg1 + settings.qianfan_secret_key = arg2 + params = { + "grant_type": "client_credentials", + "client_id": arg1, + "client_secret": arg2 + } + status_code = test_api_connection("https://aip.baidubce.com/oauth/2.0/token", "POST", params=params, + origin_call=origin_call) + log.debug("####") elif embedding_option == "ollama": settings.ollama_host = arg1 settings.ollama_port = int(arg2) @@ -323,7 +324,7 @@ def llm_settings(llm_type): gr.Markdown("3. Set up the Embedding.") embedding_dropdown = gr.Dropdown( - choices=["openai", "ollama", "qianfan_wenxin"], value=settings.embedding_type, label="Embedding" + choices=["openai", "qianfan_wenxin", "ollama"], value=settings.embedding_type, label="Embedding" ) @gr.render(inputs=[embedding_dropdown]) From aa47e92fb8fdfa99100f85658c20a17c74c85c30 Mon Sep 17 00:00:00 2001 From: imbajin Date: Sun, 18 Aug 2024 23:11:46 +0800 Subject: [PATCH 30/31] refact: support test wenxin/ollama conn --- .../api/exceptions/rag_exceptions.py | 11 +- .../hugegraph_llm/api/models/rag_requests.py | 2 +- .../hugegraph_llm/api/models/rag_response.py | 3 +- .../src/hugegraph_llm/api/rag_api.py | 22 +-- .../src/hugegraph_llm/demo/rag_web_demo.py | 143 ++++++++---------- 5 files changed, 84 insertions(+), 97 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/api/exceptions/rag_exceptions.py b/hugegraph-llm/src/hugegraph_llm/api/exceptions/rag_exceptions.py index 40bdb561..24ef7c1a 100644 --- a/hugegraph-llm/src/hugegraph_llm/api/exceptions/rag_exceptions.py +++ b/hugegraph-llm/src/hugegraph_llm/api/exceptions/rag_exceptions.py @@ -18,17 +18,20 @@ from fastapi import HTTPException from hugegraph_llm.api.models.rag_response import RAGResponse -class UnsupportedMethodException(HTTPException): + +class ExternalException(HTTPException): def __init__(self): - super().__init__(status_code=400, detail="Connection failed with error code: -1") + super().__init__(status_code=400, detail="Connect failed with error code -1, please check the input.") + class ConnectionFailedException(HTTPException): def __init__(self, status_code: int, message: str): super().__init__(status_code=status_code, detail=message) + def generate_response(response: RAGResponse) -> dict: if response.status_code == -1: - raise UnsupportedMethodException() + raise ExternalException() elif not (200 <= response.status_code < 300): raise ConnectionFailedException(response.status_code, response.message) - return {"message": "Connection successful. Configured finished."} \ No newline at end of file + return {"message": "Connection successful. Configured finished."} diff --git a/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py b/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py index fd77a222..d12a1b80 100644 --- a/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py +++ b/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py @@ -49,4 +49,4 @@ class LLMConfigRequest(BaseModel): secret_key: str = None # ollama-only properties host: str = None - port: str = None \ No newline at end of file + port: str = None diff --git a/hugegraph-llm/src/hugegraph_llm/api/models/rag_response.py b/hugegraph-llm/src/hugegraph_llm/api/models/rag_response.py index 720777c2..fe139eeb 100644 --- a/hugegraph-llm/src/hugegraph_llm/api/models/rag_response.py +++ b/hugegraph-llm/src/hugegraph_llm/api/models/rag_response.py @@ -17,6 +17,7 @@ from pydantic import BaseModel + class RAGResponse(BaseModel): status_code: int = -1 - message: str = "" \ No newline at end of file + message: str = "" diff --git a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py index fa7c93ec..a9c834c1 100644 --- a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py +++ b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py @@ -16,6 +16,8 @@ # under the License. from fastapi import FastAPI, status + +from hugegraph_llm.api.models.rag_response import RAGResponse from hugegraph_llm.config import settings from hugegraph_llm.api.models.rag_requests import RAGRequest, GraphConfigRequest, LLMConfigRequest from hugegraph_llm.api.exceptions.rag_exceptions import generate_response @@ -34,31 +36,31 @@ def rag_answer_api(req: RAGRequest): @app.post("/config/graph", status_code=status.HTTP_201_CREATED) def graph_config_api(req: GraphConfigRequest): # Accept status code - response = apply_graph_conf(req.ip, req.port, req.name, req.user, req.pwd, req.gs, origin_call="http") - return generate_response(response) + res = apply_graph_conf(req.ip, req.port, req.name, req.user, req.pwd, req.gs, origin_call="http") + return generate_response(RAGResponse(status_code=res, message="Missing Value")) @app.post("/config/llm", status_code=status.HTTP_201_CREATED) def llm_config_api(req: LLMConfigRequest): settings.llm_type = req.llm_type if req.llm_type == "openai": - response = apply_llm_conf( + res = apply_llm_conf( req.api_key, req.api_base, req.language_model, req.max_tokens, origin_call="http" ) elif req.llm_type == "qianfan_wenxin": - response = apply_llm_conf(req.api_key, req.secret_key, req.language_model, None, origin_call="http") + res = apply_llm_conf(req.api_key, req.secret_key, req.language_model, None, origin_call="http") else: - response = apply_llm_conf(req.host, req.port, req.language_model, None, origin_call="http") - return generate_response(response) + res = apply_llm_conf(req.host, req.port, req.language_model, None, origin_call="http") + return generate_response(RAGResponse(status_code=res, message="Missing Value")) @app.post("/config/embedding", status_code=status.HTTP_201_CREATED) def embedding_config_api(req: LLMConfigRequest): settings.embedding_type = req.llm_type if req.llm_type == "openai": - response = apply_embedding_conf(req.api_key, req.api_base, req.language_model, origin_call="http") + res = apply_embedding_conf(req.api_key, req.api_base, req.language_model, origin_call="http") elif req.llm_type == "qianfan_wenxin": - response = apply_embedding_conf(req.api_key, req.api_base, None, origin_call="http") + res = apply_embedding_conf(req.api_key, req.api_base, None, origin_call="http") else: - response = apply_embedding_conf(req.host, req.port, req.language_model, origin_call="http") - return generate_response(response) + res = apply_embedding_conf(req.host, req.port, req.language_model, origin_call="http") + return generate_response(RAGResponse(status_code=res, message="Missing Value")) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index 3624d83e..4cf9637b 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -16,34 +16,33 @@ # under the License. -import json import argparse +import json import os -import requests -import uvicorn import docx import gradio as gr +import requests +import uvicorn from fastapi import FastAPI from requests.auth import HTTPBasicAuth -from hugegraph_llm.models.llms.init_llm import LLMs +from hugegraph_llm.api.rag_api import rag_http_api +from hugegraph_llm.config import settings, resource_path +from hugegraph_llm.enums.build_mode import BuildMode from hugegraph_llm.models.embeddings.init_embedding import Embeddings +from hugegraph_llm.models.llms.init_llm import LLMs from hugegraph_llm.operators.graph_rag_task import GraphRAG from hugegraph_llm.operators.kg_construction_task import KgBuilder -from hugegraph_llm.config import settings, resource_path from hugegraph_llm.operators.llm_op.property_graph_extract import SCHEMA_EXAMPLE_PROMPT +from hugegraph_llm.utils.hugegraph_utils import get_hg_client from hugegraph_llm.utils.hugegraph_utils import init_hg_test_data, run_gremlin_query, clean_hg_data from hugegraph_llm.utils.log import log -from hugegraph_llm.utils.hugegraph_utils import get_hg_client from hugegraph_llm.utils.vector_index_utils import clean_vector_index -from hugegraph_llm.api.rag_api import rag_http_api -from hugegraph_llm.api.models.rag_response import RAGResponse -from hugegraph_llm.enums.build_mode import BuildMode def rag_answer( - text: str, raw_answer: bool, vector_only_answer: bool, graph_only_answer: bool, graph_vector_answer: bool + text: str, raw_answer: bool, vector_only_answer: bool, graph_only_answer: bool, graph_vector_answer: bool ) -> tuple: vector_search = vector_only_answer or graph_vector_answer graph_search = graph_only_answer or graph_vector_answer @@ -134,56 +133,52 @@ def build_kg(file, schema, example_prompt, build_mode) -> str: # pylint: disabl def test_api_connection(url, method="GET", - headers=None, params=None, body=None, auth=None, origin_call=None) -> RAGResponse: + headers=None, params=None, body=None, auth=None, origin_call=None) -> int: # TODO: use fastapi.request / starlette instead? - response = None - global return_dict - log.debug("Request URL: %s", url) try: if method.upper() == "GET": - response = requests.get(url, headers=headers, params=params, timeout=5, auth=auth) + resp = requests.get(url, headers=headers, params=params, timeout=5, auth=auth) elif method.upper() == "POST": - response = requests.post(url, headers=headers, params=params, json=body, timeout=5, auth=auth) + resp = requests.post(url, headers=headers, params=params, json=body, timeout=5, auth=auth) else: - raise gr.Error("Unsupported method") + raise ValueError("Unsupported HTTP method, please use GET/POST instead") except requests.exceptions.RequestException as e: - message = f"Connection failed: {e}" - log.error(message) + msg = f"Connection failed: {e}" + log.error(msg) if origin_call is None: - raise gr.Error(message) - return return_dict - - if 200 <= response.status_code < 300: - message = "Connection successful. Configured finished." - log.info(message) - gr.Info(message) - return_dict.status_code = response.status_code - return_dict.message = message - return return_dict + raise gr.Error(msg) + return -1 # Error code + + if 200 <= resp.status_code < 300: + msg = "Test connection successful~" + log.info(msg) + gr.Info(msg) else: - message = f"Connection failed with status code: {response.status_code}" - log.error(message) - try: - # TODO: Feedback on graph name errors alone, but needs to be modified - if origin_call is None: - raise gr.Error(json.loads(response.text)["message"]) - return_dict.status_code = response.status_code - return_dict.message = json.loads(response.text)["message"] - return return_dict + msg = f"Connection failed with status code: {resp.status_code}, error: {resp.text}" + log.error(msg) # TODO: Only the message returned by rag can be processed, and the other return values can't be processed - except json.JSONDecodeError as e: - if origin_call is None: - raise gr.Error(message) - return_dict.status_code = response.status_code - return_dict.message = message - return return_dict - - -def apply_embedding_config(arg1, arg2, arg3, origin_call=None) -> RAGResponse: - # TODO:Because of ollama, the qianfan_wenxin model is missing the test connect procedure, - # so it defaults to 200 so that there is no return value problem - global response + if origin_call is None: + raise gr.Error(json.loads(resp.text).get("message", msg)) + return resp.status_code + + +def config_qianfan_model(arg1, arg2, arg3 = None, origin_call=None) -> int: + settings.qianfan_api_key = arg1 + settings.qianfan_secret_key = arg2 + settings.qianfan_language_model = arg3 + params = { + "grant_type": "client_credentials", + "client_id": arg1, + "client_secret": arg2 + } + status_code = test_api_connection("https://aip.baidubce.com/oauth/2.0/token", "POST", params=params, + origin_call=origin_call) + return status_code + + +def apply_embedding_config(arg1, arg2, arg3, origin_call=None) -> int: + status_code = -1 embedding_option = settings.embedding_type if embedding_option == "openai": settings.openai_api_key = arg1 @@ -191,30 +186,22 @@ def apply_embedding_config(arg1, arg2, arg3, origin_call=None) -> RAGResponse: settings.openai_embedding_model = arg3 test_url = settings.openai_api_base + "/models" headers = {"Authorization": f"Bearer {arg1}"} - response = test_api_connection(test_url, headers=headers, origin_call=origin_call) + status_code = test_api_connection(test_url, headers=headers, origin_call=origin_call) elif embedding_option == "qianfan_wenxin": - settings.qianfan_api_key = arg1 - settings.qianfan_secret_key = arg2 - params = { - "grant_type": "client_credentials", - "client_id": arg1, - "client_secret": arg2 - } - status_code = test_api_connection("https://aip.baidubce.com/oauth/2.0/token", "POST", params=params, - origin_call=origin_call) - log.debug("####") + status_code = config_qianfan_model(arg1, arg2, origin_call=origin_call) + settings.qianfan_embedding_model = arg3 elif embedding_option == "ollama": settings.ollama_host = arg1 settings.ollama_port = int(arg2) settings.ollama_embedding_model = arg3 - # TODO: add test connection - response = RAGResponse(status_code=200) + # TODO: right way to test ollama conn? + status_code = test_api_connection(f"http://{arg1}:{arg2}/status", origin_call=origin_call) settings.update_env() gr.Info("Configured!") - return response + return status_code -def apply_graph_config(ip, port, name, user, pwd, gs, origin_call=None) -> RAGResponse: +def apply_graph_config(ip, port, name, user, pwd, gs, origin_call=None) -> int: settings.graph_ip = ip settings.graph_port = int(port) settings.graph_name = name @@ -235,10 +222,9 @@ def apply_graph_config(ip, port, name, user, pwd, gs, origin_call=None) -> RAGRe # Different llm models have different parameters, # so no meaningful argument names are given here -def apply_llm_config(arg1, arg2, arg3, arg4, origin_call=None) -> RAGResponse: +def apply_llm_config(arg1, arg2, arg3, arg4, origin_call=None) -> int: llm_option = settings.llm_type - # Because of ollama, the qianfan_wenxin model is missing the test connect procedure, - # so it defaults to 200 so that there is no return value problem + status_code = -1 if llm_option == "openai": settings.openai_api_key = arg1 settings.openai_api_base = arg2 @@ -246,27 +232,22 @@ def apply_llm_config(arg1, arg2, arg3, arg4, origin_call=None) -> RAGResponse: settings.openai_max_tokens = int(arg4) test_url = settings.openai_api_base + "/models" headers = {"Authorization": f"Bearer {arg1}"} - response = test_api_connection(test_url, headers=headers, origin_call=origin_call) + status_code = test_api_connection(test_url, headers=headers, origin_call=origin_call) elif llm_option == "qianfan_wenxin": - settings.qianfan_api_key = arg1 - settings.qianfan_secret_key = arg2 - settings.qianfan_language_model = arg3 - # TODO: add test connection - response = RAGResponse(status_code=200, message="") - # test_url = "https://aip.baidubce.com/oauth/2.0/token" # POST + status_code = config_qianfan_model(arg1, arg2, arg3, origin_call) elif llm_option == "ollama": settings.ollama_host = arg1 settings.ollama_port = int(arg2) settings.ollama_language_model = arg3 - # TODO: add test connection - response = RAGResponse(status_code=200, message="") + # TODO: right way to test ollama conn? + status_code = test_api_connection(f"http://{arg1}:{arg2}/status", origin_call=origin_call) gr.Info("Configured!") settings.update_env() - return response + return status_code def init_rag_ui() -> gr.Interface: - with gr.Blocks() as hugegraph_llm: + with gr.Blocks() as hugegraph_llm_ui: gr.Markdown( """# HugeGraph LLM RAG Demo 1. Set up the HugeGraph server.""" @@ -325,7 +306,7 @@ def llm_settings(llm_type): gr.Markdown("3. Set up the Embedding.") embedding_dropdown = gr.Dropdown( - choices=["openai", "ollama", "qianfan_wenxin"], value=settings.embedding_type, label="Embedding" + choices=["openai", "qianfan_wenxin", "ollama"], value=settings.embedding_type, label="Embedding" ) @gr.render(inputs=[embedding_dropdown]) @@ -470,7 +451,7 @@ def embedding_settings(embedding_type): out = gr.Textbox(label="Output", show_copy_button=True) btn = gr.Button("(BETA) Init HugeGraph test data (🚧WIP)") btn.click(fn=init_hg_test_data, inputs=inp, outputs=out) # pylint: disable=no-member - return hugegraph_llm + return hugegraph_llm_ui if __name__ == "__main__": From e097d0fd8d51db1f783b84de70c4404a0bfa81b8 Mon Sep 17 00:00:00 2001 From: imbajin Date: Sun, 18 Aug 2024 23:26:28 +0800 Subject: [PATCH 31/31] tiny fix --- hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py | 10 +++++----- hugegraph-llm/src/hugegraph_llm/enums/build_mode.py | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py index 4cf9637b..756cb1cc 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py @@ -163,7 +163,7 @@ def test_api_connection(url, method="GET", return resp.status_code -def config_qianfan_model(arg1, arg2, arg3 = None, origin_call=None) -> int: +def config_qianfan_model(arg1, arg2, arg3=None, origin_call=None) -> int: settings.qianfan_api_key = arg1 settings.qianfan_secret_key = arg2 settings.qianfan_language_model = arg3 @@ -359,7 +359,7 @@ def embedding_settings(embedding_type): """ ) - SCHEMA = """{ + schema = """{ "vertexlabels": [ { "id":1, @@ -396,7 +396,7 @@ def embedding_settings(embedding_type): with gr.Row(): input_file = gr.File(value=os.path.join(resource_path, "demo", "test.txt"), label="Document") - input_schema = gr.Textbox(value=SCHEMA, label="Schema") + input_schema = gr.Textbox(value=schema, label="Schema") info_extract_template = gr.Textbox(value=SCHEMA_EXAMPLE_PROMPT, label="Info extract head") with gr.Column(): mode = gr.Radio( @@ -441,10 +441,10 @@ def embedding_settings(embedding_type): with gr.Row(): with gr.Column(): inp = gr.Textbox(value="g.V().limit(10)", label="Gremlin query", show_copy_button=True) - format = gr.Checkbox(label="Format JSON", value=True) + fmt = gr.Checkbox(label="Format JSON", value=True) out = gr.Textbox(label="Output", show_copy_button=True) btn = gr.Button("Run gremlin query on HugeGraph") - btn.click(fn=run_gremlin_query, inputs=[inp, format], outputs=out) # pylint: disable=no-member + btn.click(fn=run_gremlin_query, inputs=[inp, fmt], outputs=out) # pylint: disable=no-member with gr.Row(): inp = [] diff --git a/hugegraph-llm/src/hugegraph_llm/enums/build_mode.py b/hugegraph-llm/src/hugegraph_llm/enums/build_mode.py index 1dd6eea5..50db4c82 100644 --- a/hugegraph-llm/src/hugegraph_llm/enums/build_mode.py +++ b/hugegraph-llm/src/hugegraph_llm/enums/build_mode.py @@ -18,6 +18,7 @@ from enum import Enum + class BuildMode(Enum): REBUILD_VECTOR = "Rebuild Vector" TEST_MODE = "Test Mode"