From 5518b836d2b424bde990cc6d190f208a69adee3a Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Tue, 11 Aug 2020 00:00:06 -0500 Subject: [PATCH 1/3] max_vid --- CHANGELOG.md | 1 + python/cugraph/dask/common/input_utils.py | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 961ccc67944..f2e2ba843d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,7 @@ - PR #1031 MG notebook - PR #1034 Expose resolution (gamma) parameter in Louvain - PR #1041 Use S3 bucket directly for benchmark plugin +- PR #1062 Compute max_vertex_id in mnmg local data computation ## Bug Fixes - PR #936 Update Force Atlas 2 doc and wrapper diff --git a/python/cugraph/dask/common/input_utils.py b/python/cugraph/dask/common/input_utils.py index ab038507b78..9bad0e62180 100644 --- a/python/cugraph/dask/common/input_utils.py +++ b/python/cugraph/dask/common/input_utils.py @@ -55,6 +55,7 @@ def __init__(self, gpu_futures=None, workers=None, self.multiple = multiple self.worker_info = None self.total_rows = None + self.max_vertex_id = None self.ranks = None self.parts_to_sizes = None self.local_data = None @@ -148,6 +149,7 @@ def calculate_local_data(self, comms, by): _local_data_dict = self.client.compute(local_data, sync=True) local_data_dict = {'edges': [], 'offsets': [], 'verts': []} + max_vid = 0 for rank in range(len(_local_data_dict)): data = _local_data_dict[rank] local_data_dict['edges'].append(data[0]) @@ -158,6 +160,8 @@ def calculate_local_data(self, comms, by): local_offset = prev_data[1] + 1 local_data_dict['offsets'].append(local_offset) local_data_dict['verts'].append(data[1] - local_offset + 1) + if data[2] > max_vid: + max_vid = data[2] import numpy as np local_data_dict['edges'] = np.array(local_data_dict['edges'], @@ -167,6 +171,7 @@ def calculate_local_data(self, comms, by): local_data_dict['verts'] = np.array(local_data_dict['verts'], dtype=np.int32) self.local_data = local_data_dict + self.max_vertex_id = max_vid """ Internal methods, API subject to change """ @@ -196,8 +201,9 @@ def get_obj(x): return x[0] if multiple else x def _get_local_data(df, by): df = df[0] num_local_edges = len(df) - local_max = df[by].iloc[-1] - return num_local_edges, local_max + local_by_max = df[by].iloc[-1] + local_max = df.max.max() + return num_local_edges, local_by_max, local_max def get_local_data(input_graph, by, load_balance=True): From ab5ab3bbc6af7b7140c7a740eeb5c86bb34f5617 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Tue, 11 Aug 2020 00:05:13 -0500 Subject: [PATCH 2/3] fix max call --- python/cugraph/dask/common/input_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/dask/common/input_utils.py b/python/cugraph/dask/common/input_utils.py index 9bad0e62180..8a1e55ba235 100644 --- a/python/cugraph/dask/common/input_utils.py +++ b/python/cugraph/dask/common/input_utils.py @@ -202,7 +202,7 @@ def _get_local_data(df, by): df = df[0] num_local_edges = len(df) local_by_max = df[by].iloc[-1] - local_max = df.max.max() + local_max = df.max().max() return num_local_edges, local_by_max, local_max From b5aff6825b44f176fa939413bf764ed6dc831a30 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Tue, 11 Aug 2020 00:22:44 -0500 Subject: [PATCH 3/3] fix max call --- python/cugraph/dask/common/input_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/dask/common/input_utils.py b/python/cugraph/dask/common/input_utils.py index 8a1e55ba235..ee589ab30ad 100644 --- a/python/cugraph/dask/common/input_utils.py +++ b/python/cugraph/dask/common/input_utils.py @@ -202,7 +202,7 @@ def _get_local_data(df, by): df = df[0] num_local_edges = len(df) local_by_max = df[by].iloc[-1] - local_max = df.max().max() + local_max = df[['src', 'dst']].max().max() return num_local_edges, local_by_max, local_max