From 62b51427aca8cc3dc2e7a005be2d42b29b41fb99 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 4 Jul 2023 15:32:35 +0800 Subject: [PATCH 1/6] add HugeGraphQAChain support gremlin graph language --- .../additional/graph_hugegraph_qa.ipynb | 281 ++++++++++++++++++ langchain/chains/__init__.py | 2 + langchain/chains/graph_qa/hugegraph.py | 91 ++++++ langchain/chains/graph_qa/prompts.py | 8 + langchain/graphs/__init__.py | 3 +- langchain/graphs/huge_graph.py | 62 ++++ poetry.lock | 17 ++ pyproject.toml | 1 + tests/integration_tests/test_hugegraph.py | 58 ++++ 9 files changed, 522 insertions(+), 1 deletion(-) create mode 100644 docs/extras/modules/chains/additional/graph_hugegraph_qa.ipynb create mode 100644 langchain/chains/graph_qa/hugegraph.py create mode 100644 langchain/graphs/huge_graph.py create mode 100644 tests/integration_tests/test_hugegraph.py diff --git a/docs/extras/modules/chains/additional/graph_hugegraph_qa.ipynb b/docs/extras/modules/chains/additional/graph_hugegraph_qa.ipynb new file mode 100644 index 0000000000000..1dd99d3f5b1e9 --- /dev/null +++ b/docs/extras/modules/chains/additional/graph_hugegraph_qa.ipynb @@ -0,0 +1,281 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c94240f5", + "metadata": {}, + "source": [ + "# Graph DB QA chain\n", + "\n", + "This notebook shows how to use LLMs to provide a natural language interface to [HugeGraph](https://hugegraph.apache.org/cn/) database." + ] + }, + { + "cell_type": "markdown", + "source": [ + "You will need to have a running HugeGraph instance.\n", + "You can run a local docker container by running the executing the following script:\n", + "\n", + "```\n", + "docker run \\\n", + " --name=graph \\\n", + " -itd \\\n", + " -p 8080:8080 \\\n", + " hugegraph/hugegraph\n", + "```\n", + "\n", + "If we want to connect HugeGraph in the application, we need to install python sdk:\n", + "\n", + "```\n", + "pip3 install hugegraph-python\n", + "```" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "If you are using the docker container, you need to wait a couple of second for the database to start, and then we need create schema and write graph data for the database." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "from hugegraph.connection import PyHugeGraph\n", + "client = PyHugeGraph(\"localhost\", \"8080\", user=\"admin\", pwd=\"admin\", graph=\"langchain\")" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "First, we create the schema for a simple movie database:" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "\"\"\"schema\"\"\"\n", + "schema = client.schema()\n", + "schema.propertyKey(\"name\").asText().ifNotExist().create()\n", + "schema.propertyKey(\"birthDate\").asText().ifNotExist().create()\n", + "schema.vertexLabel(\"Person\").properties(\"name\", \"birthDate\").usePrimaryKeyId().primaryKeys(\n", + " \"name\").ifNotExist().create()\n", + "schema.vertexLabel(\"Movie\").properties(\"name\").usePrimaryKeyId().primaryKeys(\"name\").ifNotExist().create()\n", + "schema.edgeLabel(\"ActedIn\").sourceLabel(\"Person\").targetLabel(\"Movie\").ifNotExist().create()" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "Then we can insert some data." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "\"\"\"graph\"\"\"\n", + "g = client.graph()\n", + "g.addVertex(\"Person\", {\"name\": \"Al Pacino\", \"birthDate\": \"1940-04-25\"})\n", + "g.addVertex(\"Person\", {\"name\": \"Robert De Niro\", \"birthDate\": \"1943-08-17\"})\n", + "g.addVertex(\"Movie\", {\"name\": \"The Godfather\"})\n", + "g.addVertex(\"Movie\", {\"name\": \"The Godfather Part II\"})\n", + "g.addVertex(\"Movie\", {\"name\": \"The Godfather Coda The Death of Michael Corleone\"})\n", + "\n", + "g.addEdge(\"ActedIn\", \"12:Al Pacino\", \"13:The Godfather\", {})\n", + "g.addEdge(\"ActedIn\", \"12:Al Pacino\", \"13:The Godfather Part II\", {})\n", + "g.addEdge(\"ActedIn\", \"12:Al Pacino\", \"13:The Godfather Coda The Death of Michael Corleone\", {})\n", + "g.addEdge(\"ActedIn\", \"12:Robert De Niro\", \"13:The Godfather Part II\", {})" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Creating `HugeGraphQAChain`\n", + "\n", + "We can now create the `HugeGraph` and `HugeGraphQAChain`. To create the `HugeGraph` we simply need to pass the database object to the `HugeGraph` constructor." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "62812aad", + "metadata": { + "is_executing": true + }, + "outputs": [], + "source": [ + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.chains import HugeGraphQAChain\n", + "from langchain.graphs import HugeGraph" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "0928915d", + "metadata": {}, + "outputs": [], + "source": [ + "graph = HugeGraph(\n", + " username=\"default\",\n", + " password=\"default\",\n", + " address=\"address\",\n", + " port=8081,\n", + " graph=\"graph\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "58c1a8ea", + "metadata": {}, + "source": [ + "## Refresh graph schema information\n", + "\n", + "If the schema of database changes, you can refresh the schema information needed to generate Gremlin statements." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4e3de44f", + "metadata": {}, + "outputs": [], + "source": [ + "# graph.refresh_schema()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1fe76ccd", + "metadata": { + "ExecuteTime": {} + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Node properties: [name: Movie, primary_keys: ['name'], properties: ['name'], name: Person, primary_keys: ['name'], properties: ['name', 'birthDate']]\n", + "Edge properties: [name: ActedIn, properties: []]\n", + "Relationships: ['Person--ActedIn-->Movie']\n", + "\n" + ] + } + ], + "source": [ + "print(graph.get_schema)" + ] + }, + { + "cell_type": "markdown", + "id": "68a3c677", + "metadata": {}, + "source": [ + "## Querying the graph\n", + "\n", + "We can now use the graph Gremlin QA chain to ask question of the graph" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "7476ce98", + "metadata": {}, + "outputs": [], + "source": [ + "chain = HugeGraphQAChain.from_llm(\n", + " ChatOpenAI(temperature=0), graph=graph, verbose=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ef8ee27b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001B[1m> Entering new GraphCypherQAChain chain...\u001B[0m\n", + "Generated Cypher:\n", + "\u001B[32;1m\u001B[1;3mg.V().has(\"name\", \"The Godfather\").in(\"ActedIn\")", + "\u001B[0m\n", + "Full Context:\n", + "\u001B[32;1m\u001B[1;3m[{'id':'12:Al Pacino','label':'Person','properties':{'name':'Al Pacino','birthDate':'1940-04-25'}}]\u001B[0m\n", + "\n", + "\u001B[1m> Finished chain.\u001B[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'Al Pacino played in The Godfather.''" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chain.run(\"Who played in The Godfather?\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/langchain/chains/__init__.py b/langchain/chains/__init__.py index 635447559578e..e5f8b7b23b2dd 100644 --- a/langchain/chains/__init__.py +++ b/langchain/chains/__init__.py @@ -15,6 +15,7 @@ from langchain.chains.flare.base import FlareChain from langchain.chains.graph_qa.base import GraphQAChain from langchain.chains.graph_qa.cypher import GraphCypherQAChain +from langchain.chains.graph_qa.hugegraph import HugeGraphQAChain from langchain.chains.graph_qa.kuzu import KuzuQAChain from langchain.chains.graph_qa.nebulagraph import NebulaGraphQAChain from langchain.chains.hyde.base import HypotheticalDocumentEmbedder @@ -69,6 +70,7 @@ "GraphQAChain", "HypotheticalDocumentEmbedder", "KuzuQAChain", + "HugeGraphQAChain", "LLMBashChain", "LLMChain", "LLMCheckerChain", diff --git a/langchain/chains/graph_qa/hugegraph.py b/langchain/chains/graph_qa/hugegraph.py new file mode 100644 index 0000000000000..091793facc489 --- /dev/null +++ b/langchain/chains/graph_qa/hugegraph.py @@ -0,0 +1,91 @@ +"""Question answering over a graph.""" +from __future__ import annotations + +from typing import Any, Dict, List, Optional + +from pydantic import Field + +from langchain.base_language import BaseLanguageModel +from langchain.callbacks.manager import CallbackManagerForChainRun +from langchain.chains.base import Chain +from langchain.chains.graph_qa.prompts import CYPHER_QA_PROMPT, GREMLIN_GENERATION_PROMPT +from langchain.chains.llm import LLMChain +from langchain.graphs.huge_graph import HugeGraph +from langchain.schema import BasePromptTemplate + + +class HugeGraphQAChain(Chain): + """Chain for question-answering against a graph by generating gremlin statements.""" + + graph: HugeGraph = Field(exclude=True) + gremlin_generation_chain: LLMChain + qa_chain: LLMChain + input_key: str = "query" #: :meta private: + output_key: str = "result" #: :meta private: + + @property + def input_keys(self) -> List[str]: + """Return the input keys. + + :meta private: + """ + return [self.input_key] + + @property + def output_keys(self) -> List[str]: + """Return the output keys. + + :meta private: + """ + _output_keys = [self.output_key] + return _output_keys + + @classmethod + def from_llm( + cls, + llm: BaseLanguageModel, + *, + qa_prompt: BasePromptTemplate = CYPHER_QA_PROMPT, + gremlin_prompt: BasePromptTemplate = GREMLIN_GENERATION_PROMPT, + **kwargs: Any, + ) -> HugeGraphQAChain: + """Initialize from LLM.""" + qa_chain = LLMChain(llm=llm, prompt=qa_prompt) + gremlin_generation_chain = LLMChain(llm=llm, prompt=gremlin_prompt) + + return cls( + qa_chain=qa_chain, + gremlin_generation_chain=gremlin_generation_chain, + **kwargs, + ) + + def _call( + self, + inputs: Dict[str, Any], + run_manager: Optional[CallbackManagerForChainRun] = None, + ) -> Dict[str, str]: + """Generate gremlin statement, use it to look up in db and answer question.""" + _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager() + callbacks = _run_manager.get_child() + question = inputs[self.input_key] + + generated_gremlin = self.gremlin_generation_chain.run( + {"question": question, "schema": self.graph.get_schema}, callbacks=callbacks + ) + + _run_manager.on_text("Generated gremlin:", end="\n", verbose=self.verbose) + _run_manager.on_text( + generated_gremlin, color="green", end="\n", verbose=self.verbose + ) + context = self.graph.query(generated_gremlin) + + _run_manager.on_text("Full Context:", end="\n", verbose=self.verbose) + _run_manager.on_text( + str(context), color="green", end="\n", verbose=self.verbose + ) + + result = self.qa_chain( + {"question": question, "context": context}, + callbacks=callbacks, + ) + return {self.output_key: result[self.qa_chain.output_key]} diff --git a/langchain/chains/graph_qa/prompts.py b/langchain/chains/graph_qa/prompts.py index 1ca588ce6790b..fc95f702875d7 100644 --- a/langchain/chains/graph_qa/prompts.py +++ b/langchain/chains/graph_qa/prompts.py @@ -90,6 +90,14 @@ input_variables=["schema", "question"], template=KUZU_GENERATION_TEMPLATE ) +GREMLIN_GENERATION_TEMPLATE = CYPHER_GENERATION_TEMPLATE.replace( + "Cypher", "Gremlin" +) + +GREMLIN_GENERATION_PROMPT = PromptTemplate( + input_variables=["schema", "question"], template=GREMLIN_GENERATION_TEMPLATE +) + CYPHER_QA_TEMPLATE = """You are an assistant that helps to form nice and human understandable answers. The information part contains the provided information that you must use to construct an answer. The provided information is authorative, you must never doubt it or try to use your internal knowledge to correct it. diff --git a/langchain/graphs/__init__.py b/langchain/graphs/__init__.py index 23c93b4662bf6..df1fd9f1d6974 100644 --- a/langchain/graphs/__init__.py +++ b/langchain/graphs/__init__.py @@ -1,7 +1,8 @@ """Graph implementations.""" +from langchain.graphs.huge_graph import HugeGraph from langchain.graphs.kuzu_graph import KuzuGraph from langchain.graphs.nebula_graph import NebulaGraph from langchain.graphs.neo4j_graph import Neo4jGraph from langchain.graphs.networkx_graph import NetworkxEntityGraph -__all__ = ["NetworkxEntityGraph", "Neo4jGraph", "NebulaGraph", "KuzuGraph"] +__all__ = ["NetworkxEntityGraph", "Neo4jGraph", "NebulaGraph", "KuzuGraph", "HugeGraph"] diff --git a/langchain/graphs/huge_graph.py b/langchain/graphs/huge_graph.py new file mode 100644 index 0000000000000..ea78f413cd301 --- /dev/null +++ b/langchain/graphs/huge_graph.py @@ -0,0 +1,62 @@ +import logging +from string import Template +from typing import Any, Dict, List + + +class HugeGraph: + """HugeGraph wrapper for graph operations""" + + def __init__( + self, + username: str = "default", + password: str = "default", + address: str = "127.0.0.1", + port: int = 8081, + graph: str = "hugegraph" + ) -> None: + """Create a new HugeGraph wrapper instance.""" + try: + from hugegraph.connection import PyHugeGraph + except ImportError: + raise ValueError( + "Please install HugeGraph Python client first: " + "`pip3 install hugegraph-python`" + ) + + self.username = username + self.password = password + self.address = address + self.port = port + self.graph = graph + self.client = PyHugeGraph(address, port, user=username, pwd=password, graph=graph) + self.schema = "" + # Set schema + try: + self.refresh_schema() + except Exception as e: + raise ValueError(f"Could not refresh schema. Error: {e}") + + @property + def get_schema(self) -> str: + """Returns the schema of the HugeGraph database""" + return self.schema + + def refresh_schema(self) -> None: + """ + Refreshes the HugeGraph schema information. + """ + schema = self.client.schema() + vertex_schema = schema.getVertexLabels() + edge_schema = schema.getEdgeLabels() + relationships = schema.getRelations() + + self.schema = ( + f"Node properties: {vertex_schema}\n" + f"Edge properties: {edge_schema}\n" + f"Relationships: {relationships}\n" + ) + + def query(self, query: str) -> List[Dict[str, Any]]: + g = self.client.gremlin() + res = g.exec(query) + return res["data"] diff --git a/poetry.lock b/poetry.lock index 95ed67c99bf44..ae98326c355f9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3506,6 +3506,23 @@ cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] +[[package]] +name = "hugegraph-python" +version = "1.0.0.12" +description = "Python client for HugeGraph" +optional = true +python-versions = "*" +files = [ + {file = "hugegraph-python-1.0.0.12.tar.gz", hash = "sha256:06b2dded70c4f4570083f8b6e3a9edfebcf5ac4f07300727afad72389917ab85"}, + {file = "hugegraph_python-1.0.0.12-py3-none-any.whl", hash = "sha256:69fe20edbe1a392d16afc74df5c94b3b96bc02c848e9ab5b5f18c112a9bc3ebe"}, +] + +[package.dependencies] +decorator = "5.1.1" +Requests = "2.31.0" +setuptools = "67.6.1" +urllib3 = "2.0.3" + [[package]] name = "huggingface-hub" version = "0.15.1" diff --git a/pyproject.toml b/pyproject.toml index a34e56a9d67a8..fa62e75cb20d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -114,6 +114,7 @@ openllm = {version = ">=0.1.19", optional = true} streamlit = {version = "^1.18.0", optional = true, python = ">=3.8.1,<3.9.7 || >3.9.7,<4.0"} psychicapi = {version = "^0.8.0", optional = true} cassio = {version = "^0.0.7", optional = true} +hugegraph-python = {version = "^1.0.0.12", optional = true} [tool.poetry.group.docs.dependencies] autodoc_pydantic = "^1.8.0" diff --git a/tests/integration_tests/test_hugegraph.py b/tests/integration_tests/test_hugegraph.py new file mode 100644 index 0000000000000..4750b10741657 --- /dev/null +++ b/tests/integration_tests/test_hugegraph.py @@ -0,0 +1,58 @@ +import unittest +from typing import Any +from unittest.mock import MagicMock, patch + +from langchain.graphs import HugeGraph + + +class TestHugeGraph(unittest.TestCase): + def setUp(self) -> None: + self.username = "test_user" + self.password = "test_password" + self.address = "test_address" + self.graph = "test_hugegraph" + self.port = 1234 + self.session_pool_size = 10 + + @patch("hugegraph.connection.PyHugeGraph") + def test_init(self, mock_client: Any) -> None: + mock_client.return_value = MagicMock() + huge_graph = HugeGraph( + self.username, + self.password, + self.address, + self.port, + self.graph + ) + self.assertEqual(huge_graph.username, self.username) + self.assertEqual(huge_graph.password, self.password) + self.assertEqual(huge_graph.address, self.address) + self.assertEqual(huge_graph.port, self.port) + self.assertEqual(huge_graph.graph, self.graph) + + @patch("hugegraph.connection.PyHugeGraph") + def test_execute(self, mock_client: Any) -> None: + mock_client.return_value = MagicMock() + huge_graph = HugeGraph( + self.username, + self.password, + self.address, + self.port, + self.graph + ) + query = "g.V().limit(10)" + result = huge_graph.query(query) + self.assertIsInstance(result, MagicMock) + + @patch("hugegraph.connection.PyHugeGraph") + def test_refresh_schema(self, mock_client: Any) -> None: + mock_client.return_value = MagicMock() + huge_graph = HugeGraph( + self.username, + self.password, + self.address, + self.port, + self.graph + ) + huge_graph.refresh_schema() + self.assertNotEqual(huge_graph.get_schema, "") From e398f2794fc0b391e675d9691b560ccab533179d Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 4 Jul 2023 16:17:01 +0800 Subject: [PATCH 2/6] fmt --- .../chains/additional/graph_hugegraph_qa.ipynb | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/docs/extras/modules/chains/additional/graph_hugegraph_qa.ipynb b/docs/extras/modules/chains/additional/graph_hugegraph_qa.ipynb index 1dd99d3f5b1e9..8f3e9171599c6 100644 --- a/docs/extras/modules/chains/additional/graph_hugegraph_qa.ipynb +++ b/docs/extras/modules/chains/additional/graph_hugegraph_qa.ipynb @@ -2,10 +2,9 @@ "cells": [ { "cell_type": "markdown", - "id": "c94240f5", "metadata": {}, "source": [ - "# Graph DB QA chain\n", + "# HugeGraph QA Chain\n", "\n", "This notebook shows how to use LLMs to provide a natural language interface to [HugeGraph](https://hugegraph.apache.org/cn/) database." ] @@ -127,7 +126,6 @@ { "cell_type": "code", "execution_count": 2, - "id": "62812aad", "metadata": { "is_executing": true }, @@ -141,7 +139,6 @@ { "cell_type": "code", "execution_count": 2, - "id": "0928915d", "metadata": {}, "outputs": [], "source": [ @@ -156,7 +153,6 @@ }, { "cell_type": "markdown", - "id": "58c1a8ea", "metadata": {}, "source": [ "## Refresh graph schema information\n", @@ -167,7 +163,6 @@ { "cell_type": "code", "execution_count": 4, - "id": "4e3de44f", "metadata": {}, "outputs": [], "source": [ @@ -177,7 +172,6 @@ { "cell_type": "code", "execution_count": 2, - "id": "1fe76ccd", "metadata": { "ExecuteTime": {} }, @@ -199,7 +193,6 @@ }, { "cell_type": "markdown", - "id": "68a3c677", "metadata": {}, "source": [ "## Querying the graph\n", @@ -210,7 +203,6 @@ { "cell_type": "code", "execution_count": 6, - "id": "7476ce98", "metadata": {}, "outputs": [], "source": [ @@ -222,7 +214,6 @@ { "cell_type": "code", "execution_count": 7, - "id": "ef8ee27b", "metadata": {}, "outputs": [ { @@ -231,8 +222,8 @@ "text": [ "\n", "\n", - "\u001B[1m> Entering new GraphCypherQAChain chain...\u001B[0m\n", - "Generated Cypher:\n", + "\u001B[1m> Entering new HugeGraphQAChain chain...\u001B[0m\n", + "Generated Gremlin:\n", "\u001B[32;1m\u001B[1;3mg.V().has(\"name\", \"The Godfather\").in(\"ActedIn\")", "\u001B[0m\n", "Full Context:\n", From 3512425e7a026cd854664721dbd1804f1389d58c Mon Sep 17 00:00:00 2001 From: Bagatur Date: Tue, 4 Jul 2023 04:32:26 -0600 Subject: [PATCH 3/6] fmt --- langchain/chains/graph_qa/hugegraph.py | 5 ++++- langchain/chains/graph_qa/prompts.py | 4 +--- langchain/graphs/huge_graph.py | 8 ++++---- pyproject.toml | 1 + tests/integration_tests/test_hugegraph.py | 18 +++--------------- 5 files changed, 13 insertions(+), 23 deletions(-) diff --git a/langchain/chains/graph_qa/hugegraph.py b/langchain/chains/graph_qa/hugegraph.py index 091793facc489..3aa2e3cf8807c 100644 --- a/langchain/chains/graph_qa/hugegraph.py +++ b/langchain/chains/graph_qa/hugegraph.py @@ -8,7 +8,10 @@ from langchain.base_language import BaseLanguageModel from langchain.callbacks.manager import CallbackManagerForChainRun from langchain.chains.base import Chain -from langchain.chains.graph_qa.prompts import CYPHER_QA_PROMPT, GREMLIN_GENERATION_PROMPT +from langchain.chains.graph_qa.prompts import ( + CYPHER_QA_PROMPT, + GREMLIN_GENERATION_PROMPT, +) from langchain.chains.llm import LLMChain from langchain.graphs.huge_graph import HugeGraph from langchain.schema import BasePromptTemplate diff --git a/langchain/chains/graph_qa/prompts.py b/langchain/chains/graph_qa/prompts.py index fc95f702875d7..c3b49b214d549 100644 --- a/langchain/chains/graph_qa/prompts.py +++ b/langchain/chains/graph_qa/prompts.py @@ -90,9 +90,7 @@ input_variables=["schema", "question"], template=KUZU_GENERATION_TEMPLATE ) -GREMLIN_GENERATION_TEMPLATE = CYPHER_GENERATION_TEMPLATE.replace( - "Cypher", "Gremlin" -) +GREMLIN_GENERATION_TEMPLATE = CYPHER_GENERATION_TEMPLATE.replace("Cypher", "Gremlin") GREMLIN_GENERATION_PROMPT = PromptTemplate( input_variables=["schema", "question"], template=GREMLIN_GENERATION_TEMPLATE diff --git a/langchain/graphs/huge_graph.py b/langchain/graphs/huge_graph.py index ea78f413cd301..f6afc99af9d54 100644 --- a/langchain/graphs/huge_graph.py +++ b/langchain/graphs/huge_graph.py @@ -1,5 +1,3 @@ -import logging -from string import Template from typing import Any, Dict, List @@ -12,7 +10,7 @@ def __init__( password: str = "default", address: str = "127.0.0.1", port: int = 8081, - graph: str = "hugegraph" + graph: str = "hugegraph", ) -> None: """Create a new HugeGraph wrapper instance.""" try: @@ -28,7 +26,9 @@ def __init__( self.address = address self.port = port self.graph = graph - self.client = PyHugeGraph(address, port, user=username, pwd=password, graph=graph) + self.client = PyHugeGraph( + address, port, user=username, pwd=password, graph=graph + ) self.schema = "" # Set schema try: diff --git a/pyproject.toml b/pyproject.toml index fa62e75cb20d9..923140974cc42 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -311,6 +311,7 @@ all = [ "awadb", "esprima", "octoai-sdk", + "hugegraph-python", ] # An extra used to be able to add extended testing. diff --git a/tests/integration_tests/test_hugegraph.py b/tests/integration_tests/test_hugegraph.py index 4750b10741657..23a3893c2a6d0 100644 --- a/tests/integration_tests/test_hugegraph.py +++ b/tests/integration_tests/test_hugegraph.py @@ -18,11 +18,7 @@ def setUp(self) -> None: def test_init(self, mock_client: Any) -> None: mock_client.return_value = MagicMock() huge_graph = HugeGraph( - self.username, - self.password, - self.address, - self.port, - self.graph + self.username, self.password, self.address, self.port, self.graph ) self.assertEqual(huge_graph.username, self.username) self.assertEqual(huge_graph.password, self.password) @@ -34,11 +30,7 @@ def test_init(self, mock_client: Any) -> None: def test_execute(self, mock_client: Any) -> None: mock_client.return_value = MagicMock() huge_graph = HugeGraph( - self.username, - self.password, - self.address, - self.port, - self.graph + self.username, self.password, self.address, self.port, self.graph ) query = "g.V().limit(10)" result = huge_graph.query(query) @@ -48,11 +40,7 @@ def test_execute(self, mock_client: Any) -> None: def test_refresh_schema(self, mock_client: Any) -> None: mock_client.return_value = MagicMock() huge_graph = HugeGraph( - self.username, - self.password, - self.address, - self.port, - self.graph + self.username, self.password, self.address, self.port, self.graph ) huge_graph.refresh_schema() self.assertNotEqual(huge_graph.get_schema, "") From c0def0cca5e08ac49ae9919f906f61148cdaa6ea Mon Sep 17 00:00:00 2001 From: Bagatur Date: Tue, 4 Jul 2023 04:36:59 -0600 Subject: [PATCH 4/6] cr --- langchain/chains/graph_qa/hugegraph.py | 2 +- langchain/graphs/__init__.py | 2 +- langchain/graphs/{huge_graph.py => hugegraph.py} | 0 tests/integration_tests/graphs/__init__.py | 0 tests/integration_tests/{ => graphs}/test_hugegraph.py | 0 5 files changed, 2 insertions(+), 2 deletions(-) rename langchain/graphs/{huge_graph.py => hugegraph.py} (100%) create mode 100644 tests/integration_tests/graphs/__init__.py rename tests/integration_tests/{ => graphs}/test_hugegraph.py (100%) diff --git a/langchain/chains/graph_qa/hugegraph.py b/langchain/chains/graph_qa/hugegraph.py index 3aa2e3cf8807c..a27da8e2c9c4d 100644 --- a/langchain/chains/graph_qa/hugegraph.py +++ b/langchain/chains/graph_qa/hugegraph.py @@ -13,7 +13,7 @@ GREMLIN_GENERATION_PROMPT, ) from langchain.chains.llm import LLMChain -from langchain.graphs.huge_graph import HugeGraph +from langchain.graphs.hugegraph import HugeGraph from langchain.schema import BasePromptTemplate diff --git a/langchain/graphs/__init__.py b/langchain/graphs/__init__.py index df1fd9f1d6974..437fe4a9d6318 100644 --- a/langchain/graphs/__init__.py +++ b/langchain/graphs/__init__.py @@ -1,5 +1,5 @@ """Graph implementations.""" -from langchain.graphs.huge_graph import HugeGraph +from langchain.graphs.hugegraph import HugeGraph from langchain.graphs.kuzu_graph import KuzuGraph from langchain.graphs.nebula_graph import NebulaGraph from langchain.graphs.neo4j_graph import Neo4jGraph diff --git a/langchain/graphs/huge_graph.py b/langchain/graphs/hugegraph.py similarity index 100% rename from langchain/graphs/huge_graph.py rename to langchain/graphs/hugegraph.py diff --git a/tests/integration_tests/graphs/__init__.py b/tests/integration_tests/graphs/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/tests/integration_tests/test_hugegraph.py b/tests/integration_tests/graphs/test_hugegraph.py similarity index 100% rename from tests/integration_tests/test_hugegraph.py rename to tests/integration_tests/graphs/test_hugegraph.py From 522a9d6c9331763dede6e196e860752220fdf045 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Tue, 4 Jul 2023 09:53:45 -0600 Subject: [PATCH 5/6] update --- .../additional/graph_hugegraph_qa.ipynb | 164 +++++++++++------- 1 file changed, 97 insertions(+), 67 deletions(-) diff --git a/docs/extras/modules/chains/additional/graph_hugegraph_qa.ipynb b/docs/extras/modules/chains/additional/graph_hugegraph_qa.ipynb index 8f3e9171599c6..261df2708b84e 100644 --- a/docs/extras/modules/chains/additional/graph_hugegraph_qa.ipynb +++ b/docs/extras/modules/chains/additional/graph_hugegraph_qa.ipynb @@ -2,6 +2,7 @@ "cells": [ { "cell_type": "markdown", + "id": "d2777010", "metadata": {}, "source": [ "# HugeGraph QA Chain\n", @@ -11,6 +12,8 @@ }, { "cell_type": "markdown", + "id": "f26dcbe4", + "metadata": {}, "source": [ "You will need to have a running HugeGraph instance.\n", "You can run a local docker container by running the executing the following script:\n", @@ -28,72 +31,88 @@ "```\n", "pip3 install hugegraph-python\n", "```" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", + "id": "d64a29f1", + "metadata": {}, "source": [ "If you are using the docker container, you need to wait a couple of second for the database to start, and then we need create schema and write graph data for the database." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 13, + "id": "e53ab93e", + "metadata": {}, "outputs": [], "source": [ "from hugegraph.connection import PyHugeGraph\n", - "client = PyHugeGraph(\"localhost\", \"8080\", user=\"admin\", pwd=\"admin\", graph=\"langchain\")" - ], - "metadata": { - "collapsed": false - } + "\n", + "client = PyHugeGraph(\"localhost\", \"8080\", user=\"admin\", pwd=\"admin\", graph=\"hugegraph\")" + ] }, { "cell_type": "markdown", + "id": "b7c3a50e", + "metadata": {}, "source": [ "First, we create the schema for a simple movie database:" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", - "execution_count": 2, - "outputs": [], + "execution_count": 4, + "id": "ef5372a8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'create EdgeLabel success, Detail: \"b\\'{\"id\":1,\"name\":\"ActedIn\",\"source_label\":\"Person\",\"target_label\":\"Movie\",\"frequency\":\"SINGLE\",\"sort_keys\":[],\"nullable_keys\":[],\"index_labels\":[],\"properties\":[],\"status\":\"CREATED\",\"ttl\":0,\"enable_label_index\":true,\"user_data\":{\"~create_time\":\"2023-07-04 10:48:47.908\"}}\\'\"'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "\"\"\"schema\"\"\"\n", "schema = client.schema()\n", "schema.propertyKey(\"name\").asText().ifNotExist().create()\n", "schema.propertyKey(\"birthDate\").asText().ifNotExist().create()\n", - "schema.vertexLabel(\"Person\").properties(\"name\", \"birthDate\").usePrimaryKeyId().primaryKeys(\n", - " \"name\").ifNotExist().create()\n", + "schema.vertexLabel(\"Person\").properties(\"name\", \"birthDate\").usePrimaryKeyId().primaryKeys(\"name\").ifNotExist().create()\n", "schema.vertexLabel(\"Movie\").properties(\"name\").usePrimaryKeyId().primaryKeys(\"name\").ifNotExist().create()\n", "schema.edgeLabel(\"ActedIn\").sourceLabel(\"Person\").targetLabel(\"Movie\").ifNotExist().create()" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", + "id": "016f7989", + "metadata": {}, "source": [ "Then we can insert some data." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", - "execution_count": 2, - "outputs": [], + "execution_count": 26, + "id": "b7f4c370", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1:Robert De Niro--ActedIn-->2:The Godfather Part II" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "\"\"\"graph\"\"\"\n", "g = client.graph()\n", @@ -103,29 +122,26 @@ "g.addVertex(\"Movie\", {\"name\": \"The Godfather Part II\"})\n", "g.addVertex(\"Movie\", {\"name\": \"The Godfather Coda The Death of Michael Corleone\"})\n", "\n", - "g.addEdge(\"ActedIn\", \"12:Al Pacino\", \"13:The Godfather\", {})\n", - "g.addEdge(\"ActedIn\", \"12:Al Pacino\", \"13:The Godfather Part II\", {})\n", - "g.addEdge(\"ActedIn\", \"12:Al Pacino\", \"13:The Godfather Coda The Death of Michael Corleone\", {})\n", - "g.addEdge(\"ActedIn\", \"12:Robert De Niro\", \"13:The Godfather Part II\", {})" - ], - "metadata": { - "collapsed": false - } + "g.addEdge(\"ActedIn\", \"1:Al Pacino\", \"2:The Godfather\", {})\n", + "g.addEdge(\"ActedIn\", \"1:Al Pacino\", \"2:The Godfather Part II\", {})\n", + "g.addEdge(\"ActedIn\", \"1:Al Pacino\", \"2:The Godfather Coda The Death of Michael Corleone\", {})\n", + "g.addEdge(\"ActedIn\", \"1:Robert De Niro\", \"2:The Godfather Part II\", {})" + ] }, { "cell_type": "markdown", + "id": "5b8f7788", + "metadata": {}, "source": [ "## Creating `HugeGraphQAChain`\n", "\n", "We can now create the `HugeGraph` and `HugeGraphQAChain`. To create the `HugeGraph` we simply need to pass the database object to the `HugeGraph` constructor." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 27, + "id": "f1f68fcf", "metadata": { "is_executing": true }, @@ -138,21 +154,23 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 28, + "id": "b86ebfa7", "metadata": {}, "outputs": [], "source": [ "graph = HugeGraph(\n", - " username=\"default\",\n", - " password=\"default\",\n", - " address=\"address\",\n", - " port=8081,\n", - " graph=\"graph\"\n", + " username=\"admin\",\n", + " password=\"admin\",\n", + " address=\"localhost\",\n", + " port=8080,\n", + " graph=\"hugegraph\"\n", ")" ] }, { "cell_type": "markdown", + "id": "e262540b", "metadata": {}, "source": [ "## Refresh graph schema information\n", @@ -162,7 +180,8 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 29, + "id": "134dd8d6", "metadata": {}, "outputs": [], "source": [ @@ -171,7 +190,8 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 30, + "id": "e78b8e72", "metadata": { "ExecuteTime": {} }, @@ -180,7 +200,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Node properties: [name: Movie, primary_keys: ['name'], properties: ['name'], name: Person, primary_keys: ['name'], properties: ['name', 'birthDate']]\n", + "Node properties: [name: Person, primary_keys: ['name'], properties: ['name', 'birthDate'], name: Movie, primary_keys: ['name'], properties: ['name']]\n", "Edge properties: [name: ActedIn, properties: []]\n", "Relationships: ['Person--ActedIn-->Movie']\n", "\n" @@ -193,6 +213,7 @@ }, { "cell_type": "markdown", + "id": "5c27e813", "metadata": {}, "source": [ "## Querying the graph\n", @@ -202,7 +223,8 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 31, + "id": "3b23dead", "metadata": {}, "outputs": [], "source": [ @@ -213,7 +235,8 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 32, + "id": "76aecc93", "metadata": {}, "outputs": [ { @@ -222,23 +245,22 @@ "text": [ "\n", "\n", - "\u001B[1m> Entering new HugeGraphQAChain chain...\u001B[0m\n", - "Generated Gremlin:\n", - "\u001B[32;1m\u001B[1;3mg.V().has(\"name\", \"The Godfather\").in(\"ActedIn\")", - "\u001B[0m\n", + "\u001b[1m> Entering new chain...\u001b[0m\n", + "Generated gremlin:\n", + "\u001b[32;1m\u001b[1;3mg.V().has('Movie', 'name', 'The Godfather').in('ActedIn').valueMap(true)\u001b[0m\n", "Full Context:\n", - "\u001B[32;1m\u001B[1;3m[{'id':'12:Al Pacino','label':'Person','properties':{'name':'Al Pacino','birthDate':'1940-04-25'}}]\u001B[0m\n", + "\u001b[32;1m\u001b[1;3m[{'id': '1:Al Pacino', 'label': 'Person', 'name': ['Al Pacino'], 'birthDate': ['1940-04-25']}]\u001b[0m\n", "\n", - "\u001B[1m> Finished chain.\u001B[0m\n" + "\u001b[1m> Finished chain.\u001b[0m\n" ] }, { "data": { "text/plain": [ - "'Al Pacino played in The Godfather.''" + "'Al Pacino played in The Godfather.'" ] }, - "execution_count": 7, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -246,13 +268,21 @@ "source": [ "chain.run(\"Who played in The Godfather?\")" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "869f0258", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "venv", "language": "python", - "name": "python3" + "name": "venv" }, "language_info": { "codemirror_mode": { @@ -264,7 +294,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.8" + "version": "3.11.3" } }, "nbformat": 4, From f8f1d795f119b30443d9cde19f9717b500220fe8 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Tue, 4 Jul 2023 09:54:44 -0600 Subject: [PATCH 6/6] rm --- pyproject.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 923140974cc42..a34e56a9d67a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -114,7 +114,6 @@ openllm = {version = ">=0.1.19", optional = true} streamlit = {version = "^1.18.0", optional = true, python = ">=3.8.1,<3.9.7 || >3.9.7,<4.0"} psychicapi = {version = "^0.8.0", optional = true} cassio = {version = "^0.0.7", optional = true} -hugegraph-python = {version = "^1.0.0.12", optional = true} [tool.poetry.group.docs.dependencies] autodoc_pydantic = "^1.8.0" @@ -311,7 +310,6 @@ all = [ "awadb", "esprima", "octoai-sdk", - "hugegraph-python", ] # An extra used to be able to add extended testing.