From c65f906b4609160e7c2791ff51915b0ccae8b558 Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Thu, 29 Jun 2023 07:32:27 -0700 Subject: [PATCH 1/3] Create stable datasets api from experimental --- python/cugraph/cugraph/__init__.py | 2 + python/cugraph/cugraph/datasets/__init__.py | 71 ++++ python/cugraph/cugraph/datasets/dataset.py | 312 ++++++++++++++++++ .../cugraph/datasets/datasets_config.yaml | 5 + .../cugraph/datasets/metadata/__init__.py | 0 .../cugraph/datasets/metadata/cyber.yaml | 22 ++ .../cugraph/datasets/metadata/dolphins.yaml | 25 ++ .../datasets/metadata/email-Eu-core.yaml | 22 ++ .../datasets/metadata/karate-disjoint.yaml | 22 ++ .../cugraph/datasets/metadata/karate.yaml | 24 ++ .../datasets/metadata/karate_asymmetric.yaml | 24 ++ .../datasets/metadata/karate_data.yaml | 22 ++ .../datasets/metadata/karate_undirected.yaml | 22 ++ .../datasets/metadata/ktruss_polbooks.yaml | 23 ++ .../cugraph/datasets/metadata/netscience.yaml | 22 ++ .../cugraph/datasets/metadata/polbooks.yaml | 22 ++ .../cugraph/datasets/metadata/small_line.yaml | 22 ++ .../cugraph/datasets/metadata/small_tree.yaml | 22 ++ .../cugraph/datasets/metadata/toy_graph.yaml | 22 ++ .../metadata/toy_graph_undirected.yaml | 22 ++ 20 files changed, 728 insertions(+) create mode 100644 python/cugraph/cugraph/datasets/__init__.py create mode 100644 python/cugraph/cugraph/datasets/dataset.py create mode 100644 python/cugraph/cugraph/datasets/datasets_config.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/__init__.py create mode 100644 python/cugraph/cugraph/datasets/metadata/cyber.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/dolphins.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/email-Eu-core.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/karate-disjoint.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/karate.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/karate_asymmetric.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/karate_data.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/karate_undirected.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/ktruss_polbooks.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/netscience.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/polbooks.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/small_line.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/small_tree.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/toy_graph.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/toy_graph_undirected.yaml diff --git a/python/cugraph/cugraph/__init__.py b/python/cugraph/cugraph/__init__.py index 3b9c4e007e2..4be27991823 100644 --- a/python/cugraph/cugraph/__init__.py +++ b/python/cugraph/cugraph/__init__.py @@ -120,4 +120,6 @@ from cugraph import exceptions +from cugraph import datasets + __version__ = "23.08.00" diff --git a/python/cugraph/cugraph/datasets/__init__.py b/python/cugraph/cugraph/datasets/__init__.py new file mode 100644 index 00000000000..d83c859c419 --- /dev/null +++ b/python/cugraph/cugraph/datasets/__init__.py @@ -0,0 +1,71 @@ +# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pathlib import Path + +# datasets module +from cugraph.datasets.dataset import ( + Dataset, + load_all, + set_download_dir, + get_download_dir, + default_download_dir +) +from cugraph.datasets import metadata + +# metadata path for .yaml files +meta_path = Path(__file__).parent / "metadata" + +# invidual datasets +karate = Dataset(meta_path / "karate.yaml") +karate_data = Dataset(meta_path / "karate_data.yaml") +karate_undirected = Dataset(meta_path / "karate_undirected.yaml") +karate_asymmetric = Dataset(meta_path / "karate_asymmetric.yaml") +karate_disjoint = Dataset(meta_path / "karate-disjoint.yaml") +dolphins = Dataset(meta_path / "dolphins.yaml") +polbooks = Dataset(meta_path / "polbooks.yaml") +netscience = Dataset(meta_path / "netscience.yaml") +cyber = Dataset(meta_path / "cyber.yaml") +small_line = Dataset(meta_path / "small_line.yaml") +small_tree = Dataset(meta_path / "small_tree.yaml") +toy_graph = Dataset(meta_path / "toy_graph.yaml") +toy_graph_undirected = Dataset(meta_path / "toy_graph_undirected.yaml") +email_Eu_core = Dataset(meta_path / "email-Eu-core.yaml") +ktruss_polbooks = Dataset(meta_path / "ktruss_polbooks.yaml") + +# batches +DATASETS_UNDIRECTED = [karate, dolphins] + +DATASETS_UNDIRECTED_WEIGHTS = [netscience] + +DATASETS_UNRENUMBERED = [karate_disjoint] + +DATASETS = [dolphins, netscience, karate_disjoint] + +DATASETS_SMALL = [karate, dolphins, polbooks] + +STRONGDATASETS = [dolphins, netscience, email_Eu_core] + +DATASETS_KTRUSS = [(polbooks, ktruss_polbooks)] + +MEDIUM_DATASETS = [polbooks] + +SMALL_DATASETS = [karate, dolphins, netscience] + +RLY_SMALL_DATASETS = [small_line, small_tree] + +ALL_DATASETS = [karate, dolphins, netscience, polbooks, small_line, small_tree] + +ALL_DATASETS_WGT = [karate, dolphins, netscience, polbooks, small_line, small_tree] + +TEST_GROUP = [dolphins, netscience] diff --git a/python/cugraph/cugraph/datasets/dataset.py b/python/cugraph/cugraph/datasets/dataset.py new file mode 100644 index 00000000000..6b395d50fef --- /dev/null +++ b/python/cugraph/cugraph/datasets/dataset.py @@ -0,0 +1,312 @@ +# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cudf +import yaml +import os +from pathlib import Path +from cugraph.structure.graph_classes import Graph + + +class DefaultDownloadDir: + """ + Maintains the path to the download directory used by Dataset instances. + Instances of this class are typically shared by several Dataset instances + in order to allow for the download directory to be defined and updated by + a single object. + """ + + def __init__(self): + self._path = Path( + os.environ.get("RAPIDS_DATASET_ROOT_DIR", Path.home() / ".cugraph/datasets") + ) + + @property + def path(self): + """ + If `path` is not set, set it to the environment variable + RAPIDS_DATASET_ROOT_DIR. If the variable is not set, default to the + user's home directory. + """ + if self._path is None: + self._path = Path( + os.environ.get( + "RAPIDS_DATASET_ROOT_DIR", Path.home() / ".cugraph/datasets" + ) + ) + return self._path + + @path.setter + def path(self, new): + self._path = Path(new) + + def clear(self): + self._path = None + + +default_download_dir = DefaultDownloadDir() + + +class Dataset: + """ + A Dataset Object, used to easily import edgelist data and cuGraph.Graph + instances. + + Parameters + ---------- + meta_data_file_name : yaml file + The metadata file for the specific graph dataset, which includes + information on the name, type, url link, data loading format, graph + properties + """ + + def __init__( + self, + metadata_yaml_file=None, + csv_file=None, + csv_header=None, + csv_delim=" ", + csv_col_names=None, + csv_col_dtypes=None, + ): + self._metadata_file = None + self._dl_path = default_download_dir + self._edgelist = None + self._path = None + + if metadata_yaml_file is not None and csv_file is not None: + raise ValueError("cannot specify both metadata_yaml_file and csv_file") + + elif metadata_yaml_file is not None: + with open(metadata_yaml_file, "r") as file: + self.metadata = yaml.safe_load(file) + self._metadata_file = Path(metadata_yaml_file) + + elif csv_file is not None: + if csv_col_names is None or csv_col_dtypes is None: + raise ValueError( + "csv_col_names and csv_col_dtypes must both be " + "not None when csv_file is specified." + ) + self._path = Path(csv_file) + if self._path.exists() is False: + raise FileNotFoundError(csv_file) + self.metadata = { + "name": self._path.with_suffix("").name, + "file_type": ".csv", + "url": None, + "header": csv_header, + "delim": csv_delim, + "col_names": csv_col_names, + "col_types": csv_col_dtypes, + } + + else: + raise ValueError("must specify either metadata_yaml_file or csv_file") + + def __str__(self): + """ + Use the basename of the meta_data_file the instance was constructed with, + without any extension, as the string repr. + """ + # The metadata file is likely to have a more descriptive file name, so + # use that one first if present. + # FIXME: this may need to provide a more unique or descriptive string repr + if self._metadata_file is not None: + return self._metadata_file.with_suffix("").name + else: + return self.get_path().with_suffix("").name + + def __download_csv(self, url): + """ + Downloads the .csv file from url to the current download path + (self._dl_path), updates self._path with the full path to the + downloaded file, and returns the latest value of self._path. + """ + self._dl_path.path.mkdir(parents=True, exist_ok=True) + + filename = self.metadata["name"] + self.metadata["file_type"] + if self._dl_path.path.is_dir(): + df = cudf.read_csv(url) + self._path = self._dl_path.path / filename + df.to_csv(self._path, index=False) + + else: + raise RuntimeError( + f"The directory {self._dl_path.path.absolute()}" "does not exist" + ) + return self._path + + def unload(self): + + """ + Remove all saved internal objects, forcing them to be re-created when + accessed. + + NOTE: This will cause calls to get_*() to re-read the dataset file from + disk. The caller should ensure the file on disk has not moved/been + deleted/changed. + """ + self._edgelist = None + + def get_edgelist(self, fetch=False): + """ + Return an Edgelist + + Parameters + ---------- + fetch : Boolean (default=False) + Automatically fetch for the dataset from the 'url' location within + the YAML file. + """ + if self._edgelist is None: + full_path = self.get_path() + if not full_path.is_file(): + if fetch: + full_path = self.__download_csv(self.metadata["url"]) + else: + raise RuntimeError( + f"The datafile {full_path} does not" + " exist. Try get_edgelist(fetch=True)" + " to download the datafile" + ) + header = None + if isinstance(self.metadata["header"], int): + header = self.metadata["header"] + self._edgelist = cudf.read_csv( + full_path, + delimiter=self.metadata["delim"], + names=self.metadata["col_names"], + dtype=self.metadata["col_types"], + header=header, + ) + + return self._edgelist + + def get_graph( + self, + fetch=False, + create_using=Graph, + ignore_weights=False, + store_transposed=False, + ): + """ + Return a Graph object. + + Parameters + ---------- + fetch : Boolean (default=False) + Downloads the dataset from the web. + + create_using: cugraph.Graph (instance or class), optional + (default=Graph) + Specify the type of Graph to create. Can pass in an instance to + create a Graph instance with specified 'directed' attribute. + + ignore_weights : Boolean (default=False) + Ignores weights in the dataset if True, resulting in an + unweighted Graph. If False (the default), weights from the + dataset -if present- will be applied to the Graph. If the + dataset does not contain weights, the Graph returned will + be unweighted regardless of ignore_weights. + """ + if self._edgelist is None: + self.get_edgelist(fetch) + + if create_using is None: + G = Graph() + elif isinstance(create_using, Graph): + # what about BFS if trnaposed is True + attrs = {"directed": create_using.is_directed()} + G = type(create_using)(**attrs) + elif type(create_using) is type: + G = create_using() + else: + raise TypeError( + "create_using must be a cugraph.Graph " + "(or subclass) type or instance, got: " + f"{type(create_using)}" + ) + + if len(self.metadata["col_names"]) > 2 and not (ignore_weights): + G.from_cudf_edgelist( + self._edgelist, + source="src", + destination="dst", + edge_attr="wgt", + store_transposed=store_transposed, + ) + else: + G.from_cudf_edgelist( + self._edgelist, + source="src", + destination="dst", + store_transposed=store_transposed, + ) + return G + + def get_path(self): + """ + Returns the location of the stored dataset file + """ + if self._path is None: + self._path = self._dl_path.path / ( + self.metadata["name"] + self.metadata["file_type"] + ) + + return self._path.absolute() + + +def load_all(force=False): + """ + Looks in `metadata` directory and fetches all datafiles from the the URLs + provided in each YAML file. + + Parameters + force : Boolean (default=False) + Overwrite any existing copies of datafiles. + """ + default_download_dir.path.mkdir(parents=True, exist_ok=True) + + meta_path = Path(__file__).parent.absolute() / "metadata" + for file in meta_path.iterdir(): + meta = None + if file.suffix == ".yaml": + with open(meta_path / file, "r") as metafile: + meta = yaml.safe_load(metafile) + + if "url" in meta: + filename = meta["name"] + meta["file_type"] + save_to = default_download_dir.path / filename + if not save_to.is_file() or force: + df = cudf.read_csv(meta["url"]) + df.to_csv(save_to, index=False) + + +def set_download_dir(path): + """ + Set the download directory for fetching datasets + + Parameters + ---------- + path : String + Location used to store datafiles + """ + if path is None: + default_download_dir.clear() + else: + default_download_dir.path = path + + +def get_download_dir(): + return default_download_dir.path.absolute() diff --git a/python/cugraph/cugraph/datasets/datasets_config.yaml b/python/cugraph/cugraph/datasets/datasets_config.yaml new file mode 100644 index 00000000000..69a79db9cd9 --- /dev/null +++ b/python/cugraph/cugraph/datasets/datasets_config.yaml @@ -0,0 +1,5 @@ +--- +fetch: "False" +force: "False" +# path where datasets will be downloaded to and stored +download_dir: "datasets" diff --git a/python/cugraph/cugraph/datasets/metadata/__init__.py b/python/cugraph/cugraph/datasets/metadata/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/cugraph/cugraph/datasets/metadata/cyber.yaml b/python/cugraph/cugraph/datasets/metadata/cyber.yaml new file mode 100644 index 00000000000..93ab5345442 --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/cyber.yaml @@ -0,0 +1,22 @@ +name: cyber +file_type: .csv +author: N/A +url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/cyber.csv +refs: N/A +col_names: + - idx + - srcip + - dstip +col_types: + - int32 + - str + - str +delim: "," +header: 0 +has_loop: true +is_directed: true +is_multigraph: false +is_symmetric: false +number_of_edges: 2546575 +number_of_nodes: 706529 +number_of_lines: 2546576 diff --git a/python/cugraph/cugraph/datasets/metadata/dolphins.yaml b/python/cugraph/cugraph/datasets/metadata/dolphins.yaml new file mode 100644 index 00000000000..e4951375321 --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/dolphins.yaml @@ -0,0 +1,25 @@ +name: dolphins +file_type: .csv +author: D. Lusseau +url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/dolphins.csv +refs: + D. Lusseau, K. Schneider, O. J. Boisseau, P. Haase, E. Slooten, and S. M. Dawson, + The bottlenose dolphin community of Doubtful Sound features a large proportion of + long-lasting associations, Behavioral Ecology and Sociobiology 54, 396-405 (2003). +col_names: + - src + - dst + - wgt +col_types: + - int32 + - int32 + - float32 +delim: " " +header: None +has_loop: false +is_directed: true +is_multigraph: false +is_symmetric: false +number_of_edges: 318 +number_of_nodes: 62 +number_of_lines: 318 diff --git a/python/cugraph/cugraph/datasets/metadata/email-Eu-core.yaml b/python/cugraph/cugraph/datasets/metadata/email-Eu-core.yaml new file mode 100644 index 00000000000..97d0dc82ee3 --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/email-Eu-core.yaml @@ -0,0 +1,22 @@ +name: email-Eu-core +file_type: .csv +author: null +url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/email-Eu-core.csv +refs: null +delim: " " +header: None +col_names: + - src + - dst + - wgt +col_types: + - int32 + - int32 + - float32 +has_loop: false +is_directed: false +is_multigraph: false +is_symmetric: true +number_of_edges: 25571 +number_of_nodes: 1005 +number_of_lines: 25571 diff --git a/python/cugraph/cugraph/datasets/metadata/karate-disjoint.yaml b/python/cugraph/cugraph/datasets/metadata/karate-disjoint.yaml new file mode 100644 index 00000000000..0c0eaf78b63 --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/karate-disjoint.yaml @@ -0,0 +1,22 @@ +name: karate-disjoint +file_type: .csv +author: null +url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/karate-disjoint.csv +refs: null +delim: " " +header: None +col_names: + - src + - dst + - wgt +col_types: + - int32 + - int32 + - float32 +has_loop: false +is_directed: True +is_multigraph: false +is_symmetric: true +number_of_edges: 312 +number_of_nodes: 68 +number_of_lines: 312 diff --git a/python/cugraph/cugraph/datasets/metadata/karate.yaml b/python/cugraph/cugraph/datasets/metadata/karate.yaml new file mode 100644 index 00000000000..273381ed368 --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/karate.yaml @@ -0,0 +1,24 @@ +name: karate +file_type: .csv +author: Zachary W. +url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/karate.csv +refs: + W. W. Zachary, An information flow model for conflict and fission in small groups, + Journal of Anthropological Research 33, 452-473 (1977). +delim: " " +header: None +col_names: + - src + - dst + - wgt +col_types: + - int32 + - int32 + - float32 +has_loop: true +is_directed: true +is_multigraph: false +is_symmetric: true +number_of_edges: 156 +number_of_nodes: 34 +number_of_lines: 156 diff --git a/python/cugraph/cugraph/datasets/metadata/karate_asymmetric.yaml b/python/cugraph/cugraph/datasets/metadata/karate_asymmetric.yaml new file mode 100644 index 00000000000..3616b8fb3a5 --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/karate_asymmetric.yaml @@ -0,0 +1,24 @@ +name: karate-asymmetric +file_type: .csv +author: Zachary W. +url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/karate-asymmetric.csv +delim: " " +header: None +refs: + W. W. Zachary, An information flow model for conflict and fission in small groups, + Journal of Anthropological Research 33, 452-473 (1977). +col_names: + - src + - dst + - wgt +col_types: + - int32 + - int32 + - float32 +has_loop: true +is_directed: false +is_multigraph: false +is_symmetric: false +number_of_edges: 78 +number_of_nodes: 34 +number_of_lines: 78 diff --git a/python/cugraph/cugraph/datasets/metadata/karate_data.yaml b/python/cugraph/cugraph/datasets/metadata/karate_data.yaml new file mode 100644 index 00000000000..9a8b27f21ae --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/karate_data.yaml @@ -0,0 +1,22 @@ +name: karate-data +file_type: .csv +author: Zachary W. +url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/karate-data.csv +refs: + W. W. Zachary, An information flow model for conflict and fission in small groups, + Journal of Anthropological Research 33, 452-473 (1977). +delim: "\t" +header: None +col_names: + - src + - dst +col_types: + - int32 + - int32 +has_loop: true +is_directed: true +is_multigraph: false +is_symmetric: true +number_of_edges: 156 +number_of_nodes: 34 +number_of_lines: 156 diff --git a/python/cugraph/cugraph/datasets/metadata/karate_undirected.yaml b/python/cugraph/cugraph/datasets/metadata/karate_undirected.yaml new file mode 100644 index 00000000000..1b45f86caee --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/karate_undirected.yaml @@ -0,0 +1,22 @@ +name: karate_undirected +file_type: .csv +author: Zachary W. +url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/karate_undirected.csv +refs: + W. W. Zachary, An information flow model for conflict and fission in small groups, + Journal of Anthropological Research 33, 452-473 (1977). +delim: "\t" +header: None +col_names: + - src + - dst +col_types: + - int32 + - int32 +has_loop: true +is_directed: false +is_multigraph: false +is_symmetric: true +number_of_edges: 78 +number_of_nodes: 34 +number_of_lines: 78 diff --git a/python/cugraph/cugraph/datasets/metadata/ktruss_polbooks.yaml b/python/cugraph/cugraph/datasets/metadata/ktruss_polbooks.yaml new file mode 100644 index 00000000000..1ef29b3917e --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/ktruss_polbooks.yaml @@ -0,0 +1,23 @@ +name: ktruss_polbooks +file_type: .csv +author: null +url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/ref/ktruss/polbooks.csv +refs: null +delim: " " +header: None +col_names: + - src + - dst + - wgt +col_types: + - int32 + - int32 + - float32 +has_loop: false +is_directed: true +is_multigraph: false +is_symmetric: false +number_of_edges: 233 +number_of_nodes: 58 +number_of_lines: 233 + diff --git a/python/cugraph/cugraph/datasets/metadata/netscience.yaml b/python/cugraph/cugraph/datasets/metadata/netscience.yaml new file mode 100644 index 00000000000..2dca702df3d --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/netscience.yaml @@ -0,0 +1,22 @@ +name: netscience +file_type: .csv +author: Newman, Mark EJ +url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/netscience.csv +refs: Finding community structure in networks using the eigenvectors of matrices. +delim: " " +header: None +col_names: + - src + - dst + - wgt +col_types: + - int32 + - int32 + - float32 +has_loop: false +is_directed: true +is_multigraph: false +is_symmetric: true +number_of_edges: 2742 +number_of_nodes: 1461 +number_of_lines: 5484 diff --git a/python/cugraph/cugraph/datasets/metadata/polbooks.yaml b/python/cugraph/cugraph/datasets/metadata/polbooks.yaml new file mode 100644 index 00000000000..5816e5672fd --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/polbooks.yaml @@ -0,0 +1,22 @@ +name: polbooks +file_type: .csv +author: V. Krebs +url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/polbooks.csv +refs: null +delim: " " +header: None +col_names: + - src + - dst + - wgt +col_types: + - int32 + - int32 + - float32 +is_directed: true +has_loop: null +is_multigraph: null +is_symmetric: true +number_of_edges: 882 +number_of_nodes: 105 +number_of_lines: 882 diff --git a/python/cugraph/cugraph/datasets/metadata/small_line.yaml b/python/cugraph/cugraph/datasets/metadata/small_line.yaml new file mode 100644 index 00000000000..5b724ac99fd --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/small_line.yaml @@ -0,0 +1,22 @@ +name: small_line +file_type: .csv +author: null +url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/small_line.csv +refs: null +delim: " " +header: None +col_names: + - src + - dst + - wgt +col_types: + - int32 + - int32 + - float32 +has_loop: false +is_directed: false +is_multigraph: false +is_symmetric: true +number_of_edges: 9 +number_of_nodes: 10 +number_of_lines: 8 diff --git a/python/cugraph/cugraph/datasets/metadata/small_tree.yaml b/python/cugraph/cugraph/datasets/metadata/small_tree.yaml new file mode 100644 index 00000000000..8eeac346d2a --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/small_tree.yaml @@ -0,0 +1,22 @@ +name: small_tree +file_type: .csv +author: null +url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/small_tree.csv +refs: null +delim: " " +header: None +col_names: + - src + - dst + - wgt +col_types: + - int32 + - int32 + - float32 +has_loop: false +is_directed: true +is_multigraph: false +is_symmetric: true +number_of_edges: 11 +number_of_nodes: 9 +number_of_lines: 11 diff --git a/python/cugraph/cugraph/datasets/metadata/toy_graph.yaml b/python/cugraph/cugraph/datasets/metadata/toy_graph.yaml new file mode 100644 index 00000000000..819aad06f6a --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/toy_graph.yaml @@ -0,0 +1,22 @@ +name: toy_graph +file_type: .csv +author: null +url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/toy_graph.csv +refs: null +delim: " " +header: None +col_names: + - src + - dst + - wgt +col_types: + - int32 + - int32 + - float32 +has_loop: false +is_directed: false +is_multigraph: false +is_symmetric: true +number_of_edges: 16 +number_of_nodes: 6 +number_of_lines: 16 diff --git a/python/cugraph/cugraph/datasets/metadata/toy_graph_undirected.yaml b/python/cugraph/cugraph/datasets/metadata/toy_graph_undirected.yaml new file mode 100644 index 00000000000..c6e86bdf334 --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/toy_graph_undirected.yaml @@ -0,0 +1,22 @@ +name: toy_graph_undirected +file_type: .csv +author: null +url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/toy_graph_undirected.csv +refs: null +delim: " " +header: None +col_names: + - src + - dst + - wgt +col_types: + - int32 + - int32 + - float32 +has_loop: false +is_directed: false +is_multigraph: false +is_symmetric: true +number_of_edges: 8 +number_of_nodes: 6 +number_of_lines: 8 From 5dbca1422ac0a688f90f3ad1b10ec27179ea0c98 Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Thu, 29 Jun 2023 07:43:17 -0700 Subject: [PATCH 2/3] add promoted_experimental_warning_wrapper to dataset --- python/cugraph/cugraph/experimental/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/cugraph/cugraph/experimental/__init__.py b/python/cugraph/cugraph/experimental/__init__.py index 2adfb84868e..61093f46058 100644 --- a/python/cugraph/cugraph/experimental/__init__.py +++ b/python/cugraph/cugraph/experimental/__init__.py @@ -50,6 +50,8 @@ from cugraph.experimental.datasets.dataset import Dataset +Dataset = promoted_experimental_warning_wrapper(Dataset) + from cugraph.experimental.link_prediction.jaccard import ( EXPERIMENTAL__jaccard, EXPERIMENTAL__jaccard_coefficient, From 4182d2e5c5b0a28b08d6e29df8c92528d1cc8c0b Mon Sep 17 00:00:00 2001 From: Ralph Liu Date: Thu, 29 Jun 2023 08:41:20 -0700 Subject: [PATCH 3/3] pre-commit style fix --- python/cugraph/cugraph/datasets/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/cugraph/datasets/__init__.py b/python/cugraph/cugraph/datasets/__init__.py index d83c859c419..d95a7683e1b 100644 --- a/python/cugraph/cugraph/datasets/__init__.py +++ b/python/cugraph/cugraph/datasets/__init__.py @@ -19,7 +19,7 @@ load_all, set_download_dir, get_download_dir, - default_download_dir + default_download_dir, ) from cugraph.datasets import metadata