diff --git a/.gitignore b/.gitignore index 8383e6b..1bb53a8 100644 --- a/.gitignore +++ b/.gitignore @@ -129,4 +129,4 @@ dmypy.json .pyre/ # pdm -.pdm.toml +.pdm-python diff --git a/.pdm.toml b/.pdm.toml deleted file mode 100644 index 639c763..0000000 --- a/.pdm.toml +++ /dev/null @@ -1,2 +0,0 @@ -[python] -path = "/Users/weyl/dev/ngnx/.venv/bin/python" diff --git a/README.md b/README.md index 944567d..0ec69a2 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,19 @@
- Manipulation of graphs in NebulaGraph using the NetworkX API. + Manipulate and analyze NebulaGraph data using the NetworkX API
--- @@ -31,13 +24,15 @@ --- -NebulaGraph NetworkX (ng_nx) is a tool that allows you to use the NetworkX API for manipulating graphs in NebulaGraph. It makes it easy to analyze and manipulate graphs using NebulaGraph's advanced capabilities while still using the familiar NetworkX interface. In short, ng_nx bridges the gap between NebulaGraph and NetworkX. +NebulaGraph NetworkX (ng_nx) is a powerful tool that bridges NebulaGraph and NetworkX, enabling you to leverage NetworkX's rich set of graph algorithms and analysis tools on data stored in NebulaGraph. This integration combines NebulaGraph's advanced storage capabilities with NetworkX's extensive graph analysis functionality. ## Quick Start -Prepare for a NebulaGraph cluster within Colab in 5 mins following https://github.com/nebula-contrib/nebulagraph-lite. +### Prerequisites + +Ensure you have a NebulaGraph cluster running. For a quick setup, you can use [NebulaGraph Lite](https://github.com/nebula-contrib/nebulagraph-lite) to set up a cluster in Colab within 5 minutes. -### Install +### Installation ```bash pip install ng_nx @@ -123,7 +118,41 @@ louvain_writer.set_options( louvain_writer.write() ``` +### Using NebulaQueryReader + +The `NebulaQueryReader` allows you to execute any NebulaGraph query and construct a NetworkX graph from the result. + +```python +from ng_nx import NebulaQueryReader +from ng_nx.utils import NebulaGraphConfig + +config = NebulaGraphConfig( + space="demo_basketballplayer", + graphd_hosts="127.0.0.1:9669", + metad_hosts="127.0.0.1:9559" +) + +reader = NebulaQueryReader(nebula_config=config) + +# Execute a custom query +query = "MATCH p=(v:player{name:'Tim Duncan'})-[e:follow*1..3]->(v2) RETURN p" +g = reader.read(query) +``` + +This approach allows you to leverage the full power of NebulaGraph's query language while still being able to analyze the results using NetworkX. + +## Readers + +NG-NX provides three types of readers to fetch data from NebulaGraph: + +1. `NebulaReader`: Reads a graph from NebulaGraph based on specified edges and properties, returning a NetworkX graph. It uses the MATCH clause internally to fetch data from NebulaGraph. + +2. `NebulaQueryReader`: Executes a custom NebulaGraph query and constructs a NetworkX graph from the result. This reader is particularly useful when you need to perform complex queries or have specific data retrieval requirements. + +3. `NebulaScanReader` (Coming soon): Will read graph data from NebulaGraph using a configuration similar to `NebulaReader`, but it will bypass the MATCH clause and utilize the SCAN interface with the Storage Client for potentially improved performance on large datasets. + +Each reader is designed to cater to different use cases, providing flexibility in how you interact with and retrieve data from NebulaGraph for analysis with NetworkX. ## Documentation -[API Reference](https://github.com/wey-gu/nebulagraph-nx/blob/main/docs/API.md) +[API Reference](https://github.com/wey-gu/nebulagraph-nx/blob/main/docs/API.md) \ No newline at end of file diff --git a/ng_nx/__init__.py b/ng_nx/__init__.py index 06194ca..24461ac 100644 --- a/ng_nx/__init__.py +++ b/ng_nx/__init__.py @@ -1,11 +1,11 @@ # SPDX-License-Identifier: Apache-2.0 -# Copyright 2023 The NebulaGraph Authors. All rights reserved. +# Copyright 2024 The NebulaGraph Authors. All rights reserved. from pkgutil import extend_path __path__ = extend_path(__path__, __name__) # type: ignore -from ng_nx.query_reader import NebulaReader +from ng_nx.query_reader import NebulaReader, NebulaQueryReader from ng_nx.scan_reader import NebulaScanReader from ng_nx.writer import NebulaWriter @@ -14,4 +14,5 @@ "NebulaReader", "NebulaScanReader", "NebulaWriter", + "NebulaQueryReader", ) diff --git a/ng_nx/query_reader.py b/ng_nx/query_reader.py index 9ab6427..e202942 100644 --- a/ng_nx/query_reader.py +++ b/ng_nx/query_reader.py @@ -1,10 +1,11 @@ # SPDX-License-Identifier: Apache-2.0 -# Copyright 2023 The NebulaGraph Authors. All rights reserved. +# Copyright 2024 The NebulaGraph Authors. All rights reserved. import networkx as nx import pandas as pd from nebula3.Config import Config from nebula3.gclient.net import ConnectionPool +from nebula3.data.ResultSet import ResultSet from ng_nx.utils import NebulaGraphConfig, result_to_df @@ -76,3 +77,54 @@ def release(self): def __del__(self): self.release() + +class NebulaQueryReader: + def __init__(self, nebula_config: NebulaGraphConfig): + self.config = nebula_config + self.connection_pool = ConnectionPool() + graphd_hosts = nebula_config.graphd_hosts.split(",") + graphd_host_list = [ + (host.split(":")[0], int(host.split(":")[1])) for host in graphd_hosts + ] + config = Config() + assert self.connection_pool.init( + graphd_host_list, config + ), "Init Connection Pool Failed" + + def read(self, query: str) -> nx.MultiDiGraph: + with self.connection_pool.session_context( + self.config.user, self.config.password + ) as session: + assert session.execute( + f"USE {self.config.space}" + ).is_succeeded(), f"Failed to use space {self.config.space}" + + result: ResultSet = session.execute(query) + assert result.is_succeeded(), f"Query execution failed: {result.error_msg()}" + + vis_data = result.dict_for_vis() + return self._construct_graph(vis_data) + + def _construct_graph(self, vis_data: dict) -> nx.MultiDiGraph: + g = nx.MultiDiGraph() + + # Add nodes + for node_data in vis_data['nodes']: + g.add_node(node_data['id'], **node_data['props'], labels=node_data['labels']) + + # Add edges + for edge_data in vis_data['edges']: + g.add_edge( + edge_data['src'], + edge_data['dst'], + key=edge_data['name'], + **edge_data['props'] + ) + + return g + + def release(self): + self.connection_pool.close() + + def __del__(self): + self.release() \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index dee1bdc..8de90f8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,8 +79,8 @@ filter_files = true [project] name = "ng_nx" -version = "0.1.9" -description = "NebulaGraph NetowrkX adaptor" +version = "0.2.0" +description = "NebulaGraph NetowrkX Adaptor" authors = [ {name = "Wey Gu", email = "weyl.gu@gmail.com"}, ] @@ -88,7 +88,7 @@ authors = [ # ng_ai need to work with pyspark 2.4.x, which only support py37 dependencies = [ "networkx>=2.5.1", - "nebula3-python>=3.4.0", + "nebula3-python>=3.8.2", "pandas>=1.3.5", "numpy>=1.21.6", "scipy>=1.7.3",