From c2d1e4c84515e9adc17a60c5a1faf4bb36be2e30 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 20 Jul 2020 01:29:23 +0000 Subject: [PATCH 01/15] add nodesy --- python/dgl/transform.py | 512 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 465 insertions(+), 47 deletions(-) diff --git a/python/dgl/transform.py b/python/dgl/transform.py index e904c6e91a6a..c08eb87729c1 100644 --- a/python/dgl/transform.py +++ b/python/dgl/transform.py @@ -673,70 +673,488 @@ def metapath_reachable_graph(g, metapath): return new_g -def add_self_loop(g): - """Return a new graph containing all the edges in the input graph plus self loops - of every nodes. - No duplicate self loop will be added for nodes already having self loops. - Self-loop edges id are not preserved. All self-loop edges would be added at the end. +def add_nodes(g, num, data=None, ntype=None): + r"""Add new nodes of the same node type. + A new graph with newly added nodes is returned. + + Parameters + ---------- + num : int + Number of nodes to add. + data : dict, optional + Feature data of the added nodes. + ntype : str, optional + The type of the new nodes. Can be omitted if there is + only one node type in the graph. + + Return + ------ + DGLHeteroGraph + The graph with newly added nodes. + + Notes + ----- + + * If the key of ``data`` does not contain some existing feature fields, + those features for the new nodes will be filled with zeros). + * If the key of ``data`` contains new feature fields, those features for + the old nodes will be filled zeros). Examples - --------- + -------- - >>> g = DGLGraph() - >>> g.add_nodes(5) - >>> g.add_edges([0, 1, 2], [1, 1, 2]) - >>> new_g = dgl.transform.add_self_loop(g) # Nodes 0, 3, 4 don't have self-loop - >>> new_g.edges() - (tensor([0, 0, 1, 2, 3, 4]), tensor([1, 0, 1, 2, 3, 4])) + The following example uses PyTorch backend. + >>> import dgl + >>> import torch - Parameters - ------------ - g: DGLGraph + **Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type** - Returns + >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2]))) + >>> g.num_nodes() + 3 + >>> g = dgl.add_nodes(g, 2) + >>> g.num_nodes() + 5 + + If the graph has some node features and new nodes are added without + features, their features will be created by initializers defined + with :func:`set_n_initializer`. + + >>> g.ndata['h'] = torch.ones(5, 1) + >>> g = dgl.add_nodes(g, 1) + >>> g.ndata['h'] + tensor([[1.], [1.], [1.], [1.], [1.], [0.]]) + + We can also assign features for the new nodes in adding new nodes. + + >>> g = dgl.add_nodes(g, 1, {'h': torch.ones(1, 1), 'w': torch.ones(1, 1)}) + >>> g.ndata['h'] + tensor([[1.], [1.], [1.], [1.], [1.], [0.], [1.]]) + + Since ``data`` contains new feature fields, the features for old nodes + will be created by initializers defined with :func:`set_n_initializer`. + + >>> g.ndata['w'] + tensor([[0.], [0.], [0.], [0.], [0.], [0.], [1.]]) + + **Heterogeneous Graphs with Multiple Node Types** + + >>> g = dgl.heterograph({ + >>> ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), + >>> torch.tensor([0, 0, 1, 1])), + >>> ('developer', 'develops', 'game'): (torch.tensor([0, 1]), + >>> torch.tensor([0, 1])) + >>> }) + >>> g = dgl.add_nodes(g, 2) + DGLError: Node type name must be specified + if there are more than one node types. + >>> g.num_nodes('user') + 3 + >>> g = dgl.add_nodes(g, 2, ntype='user') + >>> g.num_nodes('user') + 5 + + See Also -------- - DGLGraph + remove_nodes + add_edges + remove_edges """ - new_g = DGLGraph() - new_g.add_nodes(g.number_of_nodes()) - src, dst = g.all_edges(order="eid") - src = F.zerocopy_to_numpy(src) - dst = F.zerocopy_to_numpy(dst) - non_self_edges_idx = src != dst - nodes = np.arange(g.number_of_nodes()) - new_g.add_edges(src[non_self_edges_idx], dst[non_self_edges_idx]) - new_g.add_edges(nodes, nodes) + # TODO(xiangsx): block do not support add_nodes + if ntype is None: + if g.number_of_ntypes() != 1: + raise DGLError('Node type name must be specified if there are more than one ' + 'node types.') + + # nothing happen + if num == 0: + return g + + assert num > 0, 'Number of new nodes should be larger than one.' + + num_nodes_dict = {} + for c_ntype in self.ntypes: + num_nodes_dict[c_ntype] = + g.number_of_nodes(c_ntype) + num if c_ntype == ntype else 0 + + graph_data = {} + for c_etype in self.canonical_etypes: + u, v = g.edges(form='uv', order='eid', etype=c_etype) + graph_data[c_etype] = (u, v) + + new_g = dgl.heterograph(graph_data, + num_nodes_dict, + idtype = g.dtype, + device = g.device) + + for c_ntype in self.ntypes: + if c_ntype == ntype: + ndata = g.nodes[c_ntype].data() + # existing features + for k, v in ndata.items(): + if data.get(k, None) is not None: + new_g.nodes[c_ntype].data[k] = + F.cat([v, utils.prepare_tensor(new_g, data[k], 'data')], dim=0) + else: + new_g.nodes[c_ntype].data[k] = v + + # non-existing features + for k, v in data: + if ndata.get(k, None) is None: + shape = F.shape(data[k]) + shape[0] = g.number_of_nodes(c_ntype) + new_g.nodes[c_ntype].data[k] = + F.cat([F.zeros(shape, F.dtype(data[k]), F.context(data[k])), + utils.prepare_tensor(new_g, data[k], 'data')], dim=0) + else: + for k, v in g.nodes[c_ntype].data: + new_g.nodes[c_ntype].data[k] = v + + for c_etype in self.canonical_etypes: + for k, v in g.edges[c_etype].data: + new_g.edges[c_etype].data[k] = v + return new_g -def remove_self_loop(g): - """Return a new graph with all self-loop edges removed +def add_edges(g, u, v, data=None, etype=None): + r"""Add multiple new edges for the specified edge type. + A new graph with newly added edges is returned. + + The i-th new edge will be from ``u[i]`` to ``v[i]``. + + Parameters + ---------- + u : int, tensor, numpy.ndarray, list + Source node IDs, ``u[i]`` gives the source node for the i-th new edge. + v : int, tensor, numpy.ndarray, list + Destination node IDs, ``v[i]`` gives the destination node for the i-th new edge. + data : dict, optional + Feature data of the added edges. The i-th row of the feature data + corresponds to the i-th new edge. + etype : str or tuple of str, optional + The type of the new edges. Can be omitted if there is + only one edge type in the graph. + + Return + ------ + DGLHeteroGraph + The graph with newly added edges. + + Notes + ----- + * If end nodes of adding edges does not exists, add_nodes is invoked + to add new nodes. The node features of the new nodes will be created + by initializers defined with :func:`set_n_initializer` (default + initializer fills zeros). In certain cases, it is recommanded to + add_nodes first and then add_edges. + * If the key of ``data`` does not contain some existing feature fields, + those features for the new edges will be created by initializers + defined with :func:`set_n_initializer` (default initializer fills zeros). + * If the key of ``data`` contains new feature fields, those features for + the old edges will be created by initializers defined with + :func:`set_n_initializer` (default initializer fills zeros). Examples - --------- + -------- - >>> g = DGLGraph() - >>> g.add_nodes(5) - >>> g.add_edges([0, 1, 2], [1, 1, 2]) - >>> new_g = dgl.transform.remove_self_loop(g) # Nodes 1, 2 have self-loop - >>> new_g.edges() - (tensor([0]), tensor([1])) + The following example uses PyTorch backend. + >>> import dgl + >>> import torch + **Homogeneous Graphs or Heterogeneous Graphs with A Single Edge Type** + >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2]))) + >>> g.num_edges() + 2 + >>> g = dgl.add_edges(g, torch.tensor([1, 3]), torch.tensor([0, 1])) + >>> g.num_edges() + 4 + Since ``u`` or ``v`` contains a non-existing node ID, the nodes are + added implicitly. + >>> g.num_nodes() + 4 + + If the graph has some edge features and new edges are added without + features, their features will be created by initializers defined + with :func:`set_n_initializer`. + + >>> g.edata['h'] = torch.ones(4, 1) + >>> g = dgl.add_edges(g, torch.tensor([1]), torch.tensor([1])) + >>> g.edata['h'] + tensor([[1.], [1.], [1.], [1.], [0.]]) + + We can also assign features for the new edges in adding new edges. + + >>> g = dgl.add_edges(g, torch.tensor([0, 0]), torch.tensor([2, 2]), + >>> {'h': torch.tensor([[1.], [2.]]), 'w': torch.ones(2, 1)}) + >>> g.edata['h'] + tensor([[1.], [1.], [1.], [1.], [0.], [1.], [2.]]) + Since ``data`` contains new feature fields, the features for old edges + will be created by initializers defined with :func:`set_n_initializer`. + >>> g.edata['w'] + tensor([[0.], [0.], [0.], [0.], [0.], [1.], [1.]]) + + **Heterogeneous Graphs with Multiple Edge Types** + + >>> g = dgl.heterograph({ + >>> ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), + >>> torch.tensor([0, 0, 1, 1])), + >>> ('developer', 'develops', 'game'): (torch.tensor([0, 1]), + >>> torch.tensor([0, 1])) + >>> }) + >>> g = dgl.add_edges(g, torch.tensor([3]), torch.tensor([3])) + DGLError: Edge type name must be specified + if there are more than one edge types. + >>> g.number_of_edges('plays') + 4 + >>> g = dgl.add_edges(g, torch.tensor([3]), torch.tensor([3]), etype='plays') + >>> g.number_of_edges('plays') + 5 + + See Also + -------- + add_nodes + remove_nodes + remove_edges + """ + +def remove_edges(g, eids, etype=None): + r"""Remove multiple edges with the specified edge type. + A new graph with certain edges deleted is returned. + + Nodes will not be removed. After removing edges, the rest + edges will be re-indexed using consecutive integers from 0, + with their relative order preserved. + The features for the removed edges will be removed accordingly. Parameters - ------------ - g: DGLGraph + ---------- + eids : int, tensor, numpy.ndarray, list + IDs for the edges to remove. + etype : str or tuple of str, optional + The type of the edges to remove. Can be omitted if there is + only one edge type in the graph. - Returns + Return + ------ + DGLHeteroGraph + The graph with edges deleted. + + Examples -------- - DGLGraph + >>> import dgl + >>> import torch + + **Homogeneous Graphs or Heterogeneous Graphs with A Single Edge Type** + + >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([0, 1, 2]))) + >>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1) + >>> g = dgl.remove_edges(g, torch.tensor([0, 1])) + >>> g + Graph(num_nodes=3, num_edges=1, + ndata_schemes={} + edata_schemes={'he': Scheme(shape=(1,), dtype=torch.float32)}) + >>> g.edges('all') + (tensor([2]), tensor([2]), tensor([0])) + >>> g.edata['he'] + tensor([[2.]]) + + **Heterogeneous Graphs with Multiple Edge Types** + + >>> g = dgl.heterograph({ + >>> ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), + >>> torch.tensor([0, 0, 1, 1])), + >>> ('developer', 'develops', 'game'): (torch.tensor([0, 1]), + >>> torch.tensor([0, 1])) + >>> }) + >>> g = dgl.remove_edges(g, torch.tensor([0, 1])) + DGLError: Edge type name must be specified + if there are more than one edge types. + >>> g = dgl.remove_edges(g, torch.tensor([0, 1]), 'plays') + >>> g.edges('all', etype='plays') + (tensor([0, 1]), tensor([0, 0]), tensor([0, 1])) + See Also + -------- + add_nodes + add_edges + remove_nodes """ - new_g = DGLGraph() - new_g.add_nodes(g.number_of_nodes()) - src, dst = g.all_edges(order="eid") - src = F.zerocopy_to_numpy(src) - dst = F.zerocopy_to_numpy(dst) - non_self_edges_idx = src != dst - new_g.add_edges(src[non_self_edges_idx], dst[non_self_edges_idx]) - return new_g + +def remove_nodes(g, nids, ntype=None): + r"""Remove multiple nodes with the specified node type. + A new graph with certain nodes deleted is returned. + + Edges that connect to the nodes will be removed as well. After removing + nodes and edges, the rest nodes and edges will be re-indexed using + consecutive integers from 0, with their relative order preserved. + The features for the removed nodes/edges will be removed accordingly. + + The features for the removed nodes/edges will be removed accordingly. + + Parameters + ---------- + nids : int, tensor, numpy.ndarray, list + Nodes to remove. + ntype : str, optional + The type of the nodes to remove. Can be omitted if there is + only one node type in the graph. + + Return + ------ + DGLHeteroGraph + The graph with nodes deleted. + + Examples + -------- + + >>> import dgl + >>> import torch + + **Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type** + + >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([0, 1, 2]))) + >>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1) + >>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1) + >>> g = dgl.remove_nodes(g, torch.tensor([0, 1])) + >>> g + Graph(num_nodes=1, num_edges=1, + ndata_schemes={'hv': Scheme(shape=(1,), dtype=torch.float32)} + edata_schemes={'he': Scheme(shape=(1,), dtype=torch.float32)}) + >>> g.ndata['hv'] + tensor([[2.]]) + >>> g.edata['he'] + tensor([[2.]]) + + **Heterogeneous Graphs with Multiple Node Types** + + >>> g = dgl.heterograph({ + >>> ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), + >>> torch.tensor([0, 0, 1, 1])), + >>> ('developer', 'develops', 'game'): (torch.tensor([0, 1]), + >>> torch.tensor([0, 1])) + >>> }) + >>> g = dgl.remove_nodes(g, torch.tensor([0, 1])) + DGLError: Node type name must be specified + if there are more than one node types. + >>> g = dgl.remove_nodes(g, torch.tensor([0, 1]), ntype='game') + >>> g.num_nodes('user') + 3 + >>> g.num_nodes('game') + 0 + >>> g.num_edges('plays') + 0 + + See Also + -------- + add_nodes + add_edges + remove_edges + """ + +def add_selfloop(g): + r""" Add self loop for each node in the graph. + A new graph with self-loop is returned. + + Since **selfloop is not well defined for unidirectional + bipartite graphs**, we simply skip the nodes corresponding + to unidirectional bipartite graphs. + + Return + ------ + DGLHeteroGraph + The graph with self-loop. + + Notes + ----- + * It is recommanded to ``remove_selfloop`` before invoking + ``add_selfloop``. + * Features for the new edges (self-loop edges) will be created + by initializers defined with :func:`set_n_initializer` + (default initializer fills zeros). + + Examples + -------- + >>> import dgl + >>> import torch + + **Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type** + + >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([2, 1, 0]))) + >>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1) + >>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1) + >>> g = dgl.add_selfloop(g) + >>> g + Graph(num_nodes=3, num_edges=6, + ndata_schemes={'hv': Scheme(shape=(1,), dtype=torch.float32)} + edata_schemes={'he': Scheme(shape=(1,), dtype=torch.float32)}) + >>> g.edata['he'] + tensor([[0.], + [1.], + [2.], + [0.], + [0.], + [0.]]) + + **Heterogeneous Graphs with Multiple Node Types** + + >>> g = dgl.heterograph({ + ('user', 'follows', 'user'): (torch.tensor([1, 2]), + torch.tensor([0, 1])), + ('user', 'plays', 'game'): (torch.tensor([0, 1]), + torch.tensor([0, 1]))}) + >>> g = dgl.add_selfloop(g) + >>> g + Graph(num_nodes={'user': 3, 'game': 2}, + num_edges={('user', 'plays', 'game'): 2, ('user', 'follows', 'user'): 5}, + metagraph=[('user', 'user'), ('user', 'game')]) + """ + +def remove_selfloop(g): + r""" Remove self loops for each node in the graph. + A new graph with self-loop removed is returned. + + If there are multiple self loops for a certain node, + all of them will be removed. + + Examples + --------- + + >>> import dgl + >>> import torch + + **Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type** + + >>> g = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([1, 0, 0, 2])), + idtype=idtype, device=F.ctx()) + >>> g.edata['he'] = torch.arange(4).float().reshape(-1, 1) + >>> g = dgl.remove_selfloop(g) + >>> g + Graph(num_nodes=3, num_edges=2, + edata_schemes={'he': Scheme(shape=(2,), dtype=torch.float32)}) + >>> g.edata['he'] + tensor([[0.],[3.]]) + + **Heterogeneous Graphs with Multiple Node Types** + + >>> g = dgl.heterograph({ + >>> ('user', 'follows', 'user'): (torch.tensor([0, 1, 1, 1, 2]), + >>> torch.tensor([0, 0, 1, 1, 1])), + >>> ('user', 'plays', 'game'): (torch.tensor([0, 1]), + >>> torch.tensor([0, 1])) + >>> }) + >>> g = dgl.remove_selfloop(g) + >>> g.num_nodes('user') + 3 + >>> g.num_nodes('game') + 2 + >>> g.num_edges('follows') + 2 + >>> g.num_edges('plays') + 2 + + See Also + -------- + add_selfloop + """ + def reorder_nodes(g, new_node_ids): """ Generate a new graph with new node Ids. From f6a9db86c2d0a9a309a82d08fa8afb07a77f77cb Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 20 Jul 2020 03:34:10 +0000 Subject: [PATCH 02/15] All three --- python/dgl/heterograph.py | 2 +- python/dgl/transform.py | 255 +++++++++++++++++++++++++++----- tests/compute/test_transform.py | 78 +++++++++- 3 files changed, 291 insertions(+), 44 deletions(-) diff --git a/python/dgl/heterograph.py b/python/dgl/heterograph.py index 071ca8fb7d00..79af8e73b433 100644 --- a/python/dgl/heterograph.py +++ b/python/dgl/heterograph.py @@ -561,7 +561,7 @@ def add_edges(self, u, v, data=None, etype=None): return assert len(u) == len(v) or len(u) == 1 or len(v) == 1, \ - 'need the number of source nodes and the number of destination nodes are same, ' \ + 'The number of source nodes and the number of destination nodes should be same, ' \ 'or either the number of source nodes or the number of destination nodes is 1.' if len(u) == 1 and len(v) > 1: diff --git a/python/dgl/transform.py b/python/dgl/transform.py index c08eb87729c1..594c48eadd51 100644 --- a/python/dgl/transform.py +++ b/python/dgl/transform.py @@ -762,23 +762,19 @@ def add_nodes(g, num, data=None, ntype=None): """ # TODO(xiangsx): block do not support add_nodes if ntype is None: - if g.number_of_ntypes() != 1: + if len(g.ntypes) != 1: raise DGLError('Node type name must be specified if there are more than one ' 'node types.') - # nothing happen - if num == 0: - return g - assert num > 0, 'Number of new nodes should be larger than one.' num_nodes_dict = {} - for c_ntype in self.ntypes: - num_nodes_dict[c_ntype] = + for c_ntype in g.ntypes: + num_nodes_dict[c_ntype] = \ g.number_of_nodes(c_ntype) + num if c_ntype == ntype else 0 graph_data = {} - for c_etype in self.canonical_etypes: + for c_etype in g.canonical_etypes: u, v = g.edges(form='uv', order='eid', etype=c_etype) graph_data[c_etype] = (u, v) @@ -787,32 +783,42 @@ def add_nodes(g, num, data=None, ntype=None): idtype = g.dtype, device = g.device) - for c_ntype in self.ntypes: + for c_ntype in g.ntypes: if c_ntype == ntype: ndata = g.nodes[c_ntype].data() # existing features - for k, v in ndata.items(): - if data.get(k, None) is not None: - new_g.nodes[c_ntype].data[k] = - F.cat([v, utils.prepare_tensor(new_g, data[k], 'data')], dim=0) + for key, val in ndata.items(): + if data is not None and data.get(key, None) is not None: + new_feats = utils.prepare_tensor(new_g, data[key], 'data') + assert len(new_feats) == num, \ + 'Data length of {} should be {}, but got {}'.format(key, + num, + len(new_feats)) + new_g.nodes[c_ntype].data[key] = F.cat([val, new_feats], dim=0) else: - new_g.nodes[c_ntype].data[k] = v + new_g.nodes[c_ntype].data[key] = val # non-existing features - for k, v in data: - if ndata.get(k, None) is None: - shape = F.shape(data[k]) - shape[0] = g.number_of_nodes(c_ntype) - new_g.nodes[c_ntype].data[k] = - F.cat([F.zeros(shape, F.dtype(data[k]), F.context(data[k])), - utils.prepare_tensor(new_g, data[k], 'data')], dim=0) + if data is not None: + for key, val in data: + if ndata.get(key, None) is None: + shape = F.shape(data[key]) + shape[0] = g.number_of_nodes(c_ntype) + new_feats = utils.prepare_tensor(new_g, data[key], 'data') + assert len(new_feats) == num, \ + 'Data length of {} should be {}, but got {}'.format(key, + num, + len(new_feats)) + new_g.nodes[c_ntype].data[key] = \ + F.cat([F.zeros(shape, F.dtype(data[key]), F.context(data[key])), + new_feats], dim=0) else: - for k, v in g.nodes[c_ntype].data: - new_g.nodes[c_ntype].data[k] = v + for key, val in g.nodes[c_ntype].data: + new_g.nodes[c_ntype].data[key] = val - for c_etype in self.canonical_etypes: - for k, v in g.edges[c_etype].data: - new_g.edges[c_etype].data[k] = v + for c_etype in g.canonical_etypes: + for key, val in g.edges[c_etype].data: + new_g.edges[c_etype].data[key] = val return new_g @@ -915,6 +921,105 @@ def add_edges(g, u, v, data=None, etype=None): remove_nodes remove_edges """ + # TODO(xiangsx): block do not support add_edges + u = utils.prepare_tensor(g, u, 'u') + v = utils.prepare_tensor(g, v, 'v') + + if etype is None: + if len(g.etypes) != 1: + raise DGLError('Edge type name must be specified if there are more than one ' + 'edge types.') + + assert len(u) > 0 and len(v) > 0, \ + 'The number of source nodes and the number of destination nodes should be larger than 0' + + assert len(u) == len(v) or len(u) == 1 or len(v) == 1, \ + 'The number of source nodes and the number of destination nodes should be same, ' \ + 'or either the number of source nodes or the number of destination nodes is 1.' + + # fill up u and v + if len(u) == 1 and len(v) > 1: + u = F.full_1d(len(v), F.as_scalar(u), dtype=F.dtype(u), ctx=F.context(u)) + if len(v) == 1 and len(u) > 1: + v = F.full_1d(len(u), F.as_scalar(v), dtype=F.dtype(v), ctx=F.context(v)) + + u_type, e_type, v_type = g.to_canonical_etype(etype) + # if end nodes of adding edges does not exists + # use add_nodes to add new nodes first. + num_of_u = g.number_of_nodes(u_type) + num_of_v = g.number_of_nodes(v_type) + u_max = F.as_scalar(F.max(u, dim=0)) + 1 + v_max = F.as_scalar(F.max(v, dim=0)) + 1 + + if u_type == v_type: + num_nodes = max(u_max, v_max) + if num_nodes > num_of_u: + g = add_nodes(g, num_nodes - num_of_u, ntype=u_type) + else: + if u_max > num_of_u: + g = add_nodes(g, u_max - num_of_u, ntype=u_type) + if v_max > num_of_v: + g = add_nodes(g, v_max - num_of_v, ntype=v_type) + + num_nodes_dict = {} + for c_ntype in g.ntypes: + num_nodes_dict[c_ntype] = \ + g.number_of_nodes(c_ntype) + num if c_ntype == ntype else 0 + + graph_data = {} + for c_etype in g.canonical_etypes: + old_u, old_v = g.edges(form='uv', order='eid', etype=c_etype) + if c_etype == (u_type, e_type, v_type) + graph_data[c_etype] = (F.cat([old_u, u], dim=0), + F.cat([old_v, v], dim=0)) + else: + graph_data[c_etype] = (u, v) + + new_g = dgl.heterograph(graph_data, + num_nodes_dict, + idtype = g.dtype, + device = g.device) + + # copy node features + for c_ntype in g.ntypes: + for key, val in g.nodes[c_ntype].data: + new_g.nodes[c_ntype].data[key] = val + + # copy edge features + for c_etype in g.canonical_etypes: + if c_etype == (u_type, e_type, v_type): + edata = g.edges[c_etype].data() + # existing features + for key, val in edata.items(): + if data is not None and data.get(key, None) is not None: + new_feats = utils.prepare_tensor(new_g, data[key], 'data') + assert len(new_feats) == len(u), \ + 'Data length of {} should be {}, but got {}'.format(key, + len(u), + len(new_feats)) + new_g.edges[c_etype].data[key] = F.cat([val, new_feats], dim=0) + else: + new_g.edges[c_etype].data[key] = val + + # non-existing features + if data is not None: + for key, val in data: + if edata.get(key, None) is None: + shape = F.shape(data[key]) + shape[0] = g.number_of_edges(c_etype) + new_feats = utils.prepare_tensor(new_g, data[key], 'data') + assert len(new_feats) == num, \ + 'Data length of {} should be {}, but got {}'.format(key, + len(u), + len(new_feats)) + new_g.edges[c_etype].data[key] = + F.cat([F.zeors(shape, F,dtype(data[key]), F.context(data[keeps])), + new_feats], dim=0) + else: + for key, val in g.edges[c_etype].data: + new_g.edges[c_etype].data[key] = val + + return new_g def remove_edges(g, eids, etype=None): r"""Remove multiple edges with the specified edge type. @@ -977,6 +1082,34 @@ def remove_edges(g, eids, etype=None): add_edges remove_nodes """ + # TODO(xiangsx): block do not support remove_edges + if etype is None: + if len(g.etypes) != 1: + raise DGLError('Edge type name must be specified if there are more than one ' \ + 'edge types.') + eids = utils.prepare_tensor(self, eids, 'u') + assert g.number_of_edges(etype) > F.as_scalar(F.max(eids, dim=0)), \ + 'The input eid {} is out of the range [0:{})'.format( + F.as_scalar(F.max(eids, dim=0)), g.number_of_edges(etype)) + + # edge_subgraph + edges = {} + etype = g.to_canonical_etype(etype) + for c_etype in g.canonical_etypes: + # the target edge type + if c_etype == etype: + old_eids = self.edges(form='eid', order='eid', etype=c_etype) + # trick here, eid_0 is 0 and should be handled + old_eids[0] += 1 + old_eids = F.scatter_row(old_eids, eids, F.full_1d( + len(eids), 0, F.dtype(old_eids), F.context(old_eids))) + edges[c_etype] = F.tensor(F.nonzero_1d(old_eids), dtype=F.dtype(old_eids)) + else: + edges[c_etype] = self.edges(form='eid', order='eid', etype=c_etype) + + sub_g = self.edge_subgraph(edges, preserve_nodes=True) + return sub_g + def remove_nodes(g, nids, ntype=None): r"""Remove multiple nodes with the specified node type. @@ -1048,8 +1181,34 @@ def remove_nodes(g, nids, ntype=None): add_edges remove_edges """ + # TODO(xiangsx): block do not support remove_nodes + if ntype is None: + if len(g.ntypes) != 1: + raise DGLError('Node type name must be specified if there are more than one ' \ + 'node types.') + + nids = utils.prepare_tensor(self, nids, 'u') + assert g.number_of_nodes(ntype) > F.as_scalar(F.max(nids, dim=0)), \ + 'The input nids {} is out of the range [0:{})'.format( + F.as_scalar(F.max(nids, dim=0)), g.number_of_nodes(ntype)) + + nodes = {} + for c_ntype in g.ntypes: + if c_ntype == ntype: + old_nids = self.nodes(c_ntype) + # trick here, nid_0 is 0 and should be handled + old_nids[0] += 1 + old_nids = F.scatter_row(old_nids, nids, F.full_1d( + len(nids), 0, F.dtype(old_nids), F.context(old_nids))) + nodes[c_ntype] = F.tensor(F.nonzero_1d(old_nids), dtype=F.dtype(old_nids)) + else: + nodes[c_ntype] = self.nodes(c_ntype) + + # node_subgraph + sub_g = self.subgraph(nodes) + return sub_g -def add_selfloop(g): +def add_self_loop(g, etype=None): r""" Add self loop for each node in the graph. A new graph with self-loop is returned. @@ -1064,8 +1223,8 @@ def add_selfloop(g): Notes ----- - * It is recommanded to ``remove_selfloop`` before invoking - ``add_selfloop``. + * It is recommanded to ``remove_self_loop`` before invoking + ``add_self_loop``. * Features for the new edges (self-loop edges) will be created by initializers defined with :func:`set_n_initializer` (default initializer fills zeros). @@ -1080,7 +1239,7 @@ def add_selfloop(g): >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([2, 1, 0]))) >>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1) >>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1) - >>> g = dgl.add_selfloop(g) + >>> g = dgl.add_self_loop(g) >>> g Graph(num_nodes=3, num_edges=6, ndata_schemes={'hv': Scheme(shape=(1,), dtype=torch.float32)} @@ -1100,20 +1259,36 @@ def add_selfloop(g): torch.tensor([0, 1])), ('user', 'plays', 'game'): (torch.tensor([0, 1]), torch.tensor([0, 1]))}) - >>> g = dgl.add_selfloop(g) + >>> g = dgl.add_self_loop(g, etype='follows') >>> g Graph(num_nodes={'user': 3, 'game': 2}, num_edges={('user', 'plays', 'game'): 2, ('user', 'follows', 'user'): 5}, metagraph=[('user', 'user'), ('user', 'game')]) """ + etype = g.to_canonical_etype(etype) + if etype[0] != etype[2]: + raise DGLError( + 'add_self_loop does not support unidirectional bipartite graphs: {}.' \ + 'Please make sure the types of head node and tail node are identical.' \ + ''.format(etype)) + nodes = g.nodes(etype[0]) + new_g = add_edges(g, nodes, nodes, etype=etype) + return new_g -def remove_selfloop(g): + +def remove_self_loop(g, etype=None): r""" Remove self loops for each node in the graph. A new graph with self-loop removed is returned. If there are multiple self loops for a certain node, all of them will be removed. + Parameters + ---------- + etype : str or tuple of str, optional + The type of the edges to remove. Can be omitted if there is + only one edge type in the graph. + Examples --------- @@ -1125,7 +1300,7 @@ def remove_selfloop(g): >>> g = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([1, 0, 0, 2])), idtype=idtype, device=F.ctx()) >>> g.edata['he'] = torch.arange(4).float().reshape(-1, 1) - >>> g = dgl.remove_selfloop(g) + >>> g = dgl.remove_self_loop(g) >>> g Graph(num_nodes=3, num_edges=2, edata_schemes={'he': Scheme(shape=(2,), dtype=torch.float32)}) @@ -1140,7 +1315,7 @@ def remove_selfloop(g): >>> ('user', 'plays', 'game'): (torch.tensor([0, 1]), >>> torch.tensor([0, 1])) >>> }) - >>> g = dgl.remove_selfloop(g) + >>> g = dgl.remove_self_loop(g) >>> g.num_nodes('user') 3 >>> g.num_nodes('game') @@ -1152,8 +1327,18 @@ def remove_selfloop(g): See Also -------- - add_selfloop + add_self_loop """ + etype = g.to_canonical_etype(etype) + if etype[0] != etype[2]: + raise DGLError( + 'remove_self_loop does not support unidirectional bipartite graphs: {}.' \ + 'Please make sure the types of head node and tail node are identical.' \ + ''.format(etype)) + u, v = g.edges(form='uv', order='eid', etype=etype) + self_loop_eids = F.tensor(F.nonzero_1d(u == v), dtype=F.dtype(u)) + new_g = remove_edges(g, self_loop_eids, etype=etype) + return new_g def reorder_nodes(g, new_node_ids): diff --git a/tests/compute/test_transform.py b/tests/compute/test_transform.py index 6665df9ce807..4548c088ba9b 100644 --- a/tests/compute/test_transform.py +++ b/tests/compute/test_transform.py @@ -56,7 +56,7 @@ def test_hetero_linegraph(idtype): np.array([0, 1, 2, 4])) assert np.array_equal(F.asnumpy(col), np.array([4, 0, 3, 1])) - g = dgl.graph(([0, 1, 1, 2, 2],[2, 0, 2, 0, 1]), + g = dgl.graph(([0, 1, 1, 2, 2],[2, 0, 2, 0, 1]), 'user', 'follows', restrict_format='csr', idtype=idtype) lg = dgl.line_heterograph(g) assert lg.number_of_nodes() == 5 @@ -67,7 +67,7 @@ def test_hetero_linegraph(idtype): assert np.array_equal(F.asnumpy(col), np.array([3, 4, 0, 3, 4, 0, 1, 2])) - g = dgl.graph(([0, 1, 1, 2, 2],[2, 0, 2, 0, 1]), + g = dgl.graph(([0, 1, 1, 2, 2],[2, 0, 2, 0, 1]), 'user', 'follows', restrict_format='csc', idtype=idtype) lg = dgl.line_heterograph(g) assert lg.number_of_nodes() == 5 @@ -643,7 +643,7 @@ def _check(g, new_g, induced_nodes): g3, always_preserve=F.tensor([1, 7], idtype)) induced_nodes = {ntype: new_g3.nodes[ntype].data[dgl.NID] for ntype in new_g3.ntypes} induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()} - + assert new_g3.idtype == idtype assert set(induced_nodes['user']) == set([0, 1, 2, 7]) _check(g3, new_g3, induced_nodes) @@ -663,7 +663,7 @@ def _check(g, new_g, induced_nodes): new_g1, new_g2 = dgl.compact_graphs( [g1, g2], always_preserve={'game': F.tensor([4, 7], dtype=idtype)}) induced_nodes = {ntype: new_g1.nodes[ntype].data[dgl.NID] for ntype in new_g1.ntypes} - induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()} + induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()} assert new_g1.idtype == idtype assert new_g2.idtype == idtype assert set(induced_nodes['user']) == set([1, 3, 5, 2, 7, 8, 9]) @@ -676,7 +676,7 @@ def _check(g, new_g, induced_nodes): [g3, g4], always_preserve=F.tensor([1, 7], dtype=idtype)) induced_nodes = {ntype: new_g3.nodes[ntype].data[dgl.NID] for ntype in new_g3.ntypes} induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()} - + assert new_g3.idtype == idtype assert new_g4.idtype == idtype @@ -924,11 +924,67 @@ def _test_cast(): # disabled; prepare for DGLGraph/HeteroGraph merge assert F.array_equal(g2src, gsrc) assert F.array_equal(g2dst, gdst) +@parametrize_dtype +def test_add_edges(idtype): + pass + +@parametrize_dtype +def test_add_nodes(idtype): + # homogeneous Graphs + g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) + g.ndata['h'] = F.copy_to(F.tensor([1,1,1], dtype=idtype), ctx=F.ctx()) + new_g = dgl.add_nodes(g, 1) + assert g.number_of_nodes() == 3 + assert new_g.number_of_nodes() == 4 + assert F.array_equal(new_g.ndata['h'], F.tensor([1, 1, 1, 0], dtype=idtype)) + + # zero node graph + g = dgl.graph([], num_nodes=3, idtype=idtype, device=F.ctx()) + g.ndata['h'] = F.copy_to(F.tensor([1,1,1], dtype=idtype), ctx=F.ctx()) + g = dgl.add_nodes(g, 1, data={'h' : F.copy_to(F.tensor([2], dtype=idtype), ctx=F.ctx())}) + assert g.number_of_nodes() == 4 + assert F.array_equal(g.ndata['h'], F.tensor([1, 1, 1, 2], dtype=idtype)) + + # bipartite graph + g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game', idtype=idtype, device=F.ctx()) + g = dgl.add_nodes(g, 2, data={'h' : F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx())}, ntype='user') + assert g.number_of_nodes('user') == 4 + assert F.array_equal(g.nodes['user'].data['h'], F.tensor([0, 0, 2, 2], dtype=idtype)) + g = dgl.add_nodes(g, 2, ntype='game') + assert g.number_of_nodes('game') == 5 + + # heterogeneous graph + g = create_test_heterograph4(idtype) + g = dgl.add_nodes(g, 1, ntype='user') + g = dgl.add_nodes(g, 2, data={'h' : F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx())}, ntype='game') + g = dgl.add_nodes(g, 0, ntype='developer') + assert g.number_of_nodes('user') == 4 + assert g.number_of_nodes('game') == 4 + assert g.number_of_nodes('developer') == 2 + assert F.array_equal(g.nodes['user'].data['h'], F.tensor([1, 1, 1, 0], dtype=idtype)) + assert F.array_equal(g.nodes['game'].data['h'], F.tensor([2, 2, 2, 2], dtype=idtype)) + +@parametrize_dtype +def test_remove_edges(idtype): + pass + +@parametrize_dtype +def test_remove_nodes(idtype): + pass + +@parametrize_dtype +def test_add_selfloop(idtype): + pass + +@parametrize_dtype +def test_remove_selfloop(idtype): + pass + if __name__ == '__main__': - test_reorder_nodes() + #test_reorder_nodes() # test_line_graph() # test_no_backtracking() - test_reverse() + #test_reverse() # test_reverse_shared_frames() # test_simple_graph() # test_bidirected_graph() @@ -941,8 +997,14 @@ def _test_cast(): # disabled; prepare for DGLGraph/HeteroGraph merge # test_metis_partition() # test_hetero_linegraph('int32') # test_compact() - test_to_simple("int32") + #test_to_simple("int32") # test_in_subgraph("int32") # test_out_subgraph() # test_to_block("int32") # test_remove_edges() + test_add_edges(F.int32) + test_add_nodes(F.int32) + test_remove_edges(F.int32) + test_remove_nodes(F.int32) + test_add_selfloop(F.int32) + test_remove_selfloop(F.int32) From 6f9ed5eb86130640b777cc43c951a8b7c903a11f Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 20 Jul 2020 05:11:55 +0000 Subject: [PATCH 03/15] Fix --- python/dgl/heterograph.py | 24 +- python/dgl/transform.py | 153 +++++------ tests/compute/test_heterograph.py | 16 +- tests/compute/test_transform.py | 414 +++++++++++++++++++++++++++++- 4 files changed, 488 insertions(+), 119 deletions(-) diff --git a/python/dgl/heterograph.py b/python/dgl/heterograph.py index 79af8e73b433..4280df7cf030 100644 --- a/python/dgl/heterograph.py +++ b/python/dgl/heterograph.py @@ -803,7 +803,7 @@ def remove_nodes(self, nids, ntype=None): self._node_frames = sub_g._node_frames self._edge_frames = sub_g._edge_frames - def add_selfloop(self, etype=None): + def add_self_loop(self, etype=None): r""" Add self loop for each node in the graph. Parameters @@ -814,8 +814,8 @@ def add_selfloop(self, etype=None): Notes ----- - * It is recommanded to ``remove_selfloop`` before invoking - ``add_selfloop``. + * It is recommanded to ``remove_self_loop`` before invoking + ``add_self_loop``. * Inplace update is applied to the current graph. * Features for the new edges (self-loop edges) will be created by initializers defined with :func:`set_n_initializer` @@ -852,22 +852,26 @@ def add_selfloop(self, etype=None): torch.tensor([0, 1])), ('user', 'plays', 'game'): (torch.tensor([0, 1]), torch.tensor([0, 1]))}) - >>> g.add_selfloop(etype='follows') + >>> g.add_self_loop(etype='follows') >>> g Graph(num_nodes={'user': 3, 'game': 2}, num_edges={('user', 'plays', 'game'): 2, ('user', 'follows', 'user'): 5}, metagraph=[('user', 'user'), ('user', 'game')]) + + See Also + -------- + remove_self_loop """ etype = self.to_canonical_etype(etype) if etype[0] != etype[2]: raise DGLError( - 'add_selfloop does not support unidirectional bipartite graphs: {}.' \ + 'add_self_loop does not support unidirectional bipartite graphs: {}.' \ 'Please make sure the types of head node and tail node are identical.' \ ''.format(etype)) nodes = self.nodes(etype[0]) self.add_edges(nodes, nodes, etype=etype) - def remove_selfloop(self, etype=None): + def remove_self_loop(self, etype=None): r""" Remove self loops for each node in the graph. If there are multiple self loops for a certain node, @@ -884,7 +888,7 @@ def remove_selfloop(self, etype=None): >>> g = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([1, 0, 0, 2])), idtype=idtype, device=F.ctx()) >>> g.edata['he'] = torch.arange(4).float().reshape(-1, 1) - >>> g.remove_selfloop() + >>> g.remove_self_loop() >>> g Graph(num_nodes=3, num_edges=2, edata_schemes={'he': Scheme(shape=(2,), dtype=torch.float32)}) @@ -899,7 +903,7 @@ def remove_selfloop(self, etype=None): >>> ('user', 'plays', 'game'): (torch.tensor([0, 1]), >>> torch.tensor([0, 1])) >>> }) - >>> g.remove_selfloop(etype='follows') + >>> g.remove_self_loop(etype='follows') >>> g.num_nodes('user') 3 >>> g.num_nodes('game') @@ -911,13 +915,13 @@ def remove_selfloop(self, etype=None): See Also -------- - add_selfloop + add_self_loop """ # TODO(xiangsx) need to handle block etype = self.to_canonical_etype(etype) if etype[0] != etype[2]: raise DGLError( - 'remove_selfloop does not support unidirectional bipartite graphs: {}.' \ + 'remove_self_loop does not support unidirectional bipartite graphs: {}.' \ 'Please make sure the types of head node and tail node are identical.' \ ''.format(etype)) u, v = self.edges(form='uv', order='eid', etype=etype) diff --git a/python/dgl/transform.py b/python/dgl/transform.py index 594c48eadd51..65c14ea8f3b4 100644 --- a/python/dgl/transform.py +++ b/python/dgl/transform.py @@ -12,7 +12,7 @@ from .graph_index import from_coo from .graph_index import _get_halo_subgraph_inner_node from .graph import unbatch -from .convert import graph, bipartite +from .convert import graph, bipartite, heterograph from . import utils from .base import EID, NID from . import ndarray as nd @@ -30,6 +30,10 @@ 'laplacian_lambda_max', 'knn_graph', 'segmented_knn_graph', + 'add_edges', + 'add_nodes', + 'remove_edges', + 'remove_nodes', 'add_self_loop', 'remove_self_loop', 'metapath_reachable_graph', @@ -38,7 +42,6 @@ 'to_simple', 'in_subgraph', 'out_subgraph', - 'remove_edges', 'as_immutable_graph', 'as_heterograph'] @@ -769,23 +772,27 @@ def add_nodes(g, num, data=None, ntype=None): assert num > 0, 'Number of new nodes should be larger than one.' num_nodes_dict = {} - for c_ntype in g.ntypes: - num_nodes_dict[c_ntype] = \ - g.number_of_nodes(c_ntype) + num if c_ntype == ntype else 0 - + if ntype is None: + num_nodes_dict[g.ntypes[0]] = g.number_of_nodes(g.ntypes[0]) + num + else: + for c_ntype in g.ntypes: + num_nodes_dict[c_ntype] = \ + g.number_of_nodes(c_ntype) + (num if c_ntype == ntype else 0) graph_data = {} for c_etype in g.canonical_etypes: u, v = g.edges(form='uv', order='eid', etype=c_etype) graph_data[c_etype] = (u, v) - new_g = dgl.heterograph(graph_data, - num_nodes_dict, - idtype = g.dtype, - device = g.device) + new_g = heterograph(graph_data, + num_nodes_dict, + idtype = g.idtype, + device = g.device) for c_ntype in g.ntypes: - if c_ntype == ntype: - ndata = g.nodes[c_ntype].data() + # ntype is None: only one ntype + # c_ntype == ntype: ntype to add nodes + if ntype is None or c_ntype == ntype: + ndata = g.nodes[c_ntype].data # existing features for key, val in ndata.items(): if data is not None and data.get(key, None) is not None: @@ -796,14 +803,17 @@ def add_nodes(g, num, data=None, ntype=None): len(new_feats)) new_g.nodes[c_ntype].data[key] = F.cat([val, new_feats], dim=0) else: - new_g.nodes[c_ntype].data[key] = val + shape = F.shape(val) + shape = (num,) + shape[1:] + new_feats = F.zeros(shape, F.dtype(val), F.context(val)) + new_g.nodes[c_ntype].data[key] = F.cat([val, new_feats], dim=0) # non-existing features if data is not None: - for key, val in data: + for key, val in data.items(): if ndata.get(key, None) is None: shape = F.shape(data[key]) - shape[0] = g.number_of_nodes(c_ntype) + shape = (g.number_of_nodes(c_ntype),) + shape[1:] new_feats = utils.prepare_tensor(new_g, data[key], 'data') assert len(new_feats) == num, \ 'Data length of {} should be {}, but got {}'.format(key, @@ -813,11 +823,11 @@ def add_nodes(g, num, data=None, ntype=None): F.cat([F.zeros(shape, F.dtype(data[key]), F.context(data[key])), new_feats], dim=0) else: - for key, val in g.nodes[c_ntype].data: + for key, val in g.nodes[c_ntype].data.items(): new_g.nodes[c_ntype].data[key] = val for c_etype in g.canonical_etypes: - for key, val in g.edges[c_etype].data: + for key, val in g.edges[c_etype].data.items(): new_g.edges[c_etype].data[key] = val return new_g @@ -940,8 +950,8 @@ def add_edges(g, u, v, data=None, etype=None): # fill up u and v if len(u) == 1 and len(v) > 1: u = F.full_1d(len(v), F.as_scalar(u), dtype=F.dtype(u), ctx=F.context(u)) - if len(v) == 1 and len(u) > 1: - v = F.full_1d(len(u), F.as_scalar(v), dtype=F.dtype(v), ctx=F.context(v)) + if len(v) == 1 and len(u) > 1: + v = F.full_1d(len(u), F.as_scalar(v), dtype=F.dtype(v), ctx=F.context(v)) u_type, e_type, v_type = g.to_canonical_etype(etype) # if end nodes of adding edges does not exists @@ -963,32 +973,31 @@ def add_edges(g, u, v, data=None, etype=None): num_nodes_dict = {} for c_ntype in g.ntypes: - num_nodes_dict[c_ntype] = \ - g.number_of_nodes(c_ntype) + num if c_ntype == ntype else 0 + num_nodes_dict[c_ntype] = g.number_of_nodes(c_ntype) graph_data = {} for c_etype in g.canonical_etypes: old_u, old_v = g.edges(form='uv', order='eid', etype=c_etype) - if c_etype == (u_type, e_type, v_type) + if c_etype == (u_type, e_type, v_type): graph_data[c_etype] = (F.cat([old_u, u], dim=0), F.cat([old_v, v], dim=0)) else: - graph_data[c_etype] = (u, v) + graph_data[c_etype] = (old_u, old_v) - new_g = dgl.heterograph(graph_data, - num_nodes_dict, - idtype = g.dtype, - device = g.device) + new_g = heterograph(graph_data, + num_nodes_dict, + idtype = g.idtype, + device = g.device) # copy node features for c_ntype in g.ntypes: - for key, val in g.nodes[c_ntype].data: + for key, val in g.nodes[c_ntype].data.items(): new_g.nodes[c_ntype].data[key] = val # copy edge features for c_etype in g.canonical_etypes: if c_etype == (u_type, e_type, v_type): - edata = g.edges[c_etype].data() + edata = g.edges[c_etype].data # existing features for key, val in edata.items(): if data is not None and data.get(key, None) is not None: @@ -999,24 +1008,27 @@ def add_edges(g, u, v, data=None, etype=None): len(new_feats)) new_g.edges[c_etype].data[key] = F.cat([val, new_feats], dim=0) else: - new_g.edges[c_etype].data[key] = val + shape = F.shape(val) + shape = (len(u),) + shape[1:] + new_feats = F.zeros(shape, F.dtype(val), F.context(val)) + new_g.edges[c_etype].data[key] = F.cat([val, new_feats], dim=0) # non-existing features if data is not None: - for key, val in data: + for key, val in data.items(): if edata.get(key, None) is None: shape = F.shape(data[key]) - shape[0] = g.number_of_edges(c_etype) + shape = (g.number_of_edges(c_etype),) + shape[1:] new_feats = utils.prepare_tensor(new_g, data[key], 'data') - assert len(new_feats) == num, \ + assert len(new_feats) == len(u), \ 'Data length of {} should be {}, but got {}'.format(key, len(u), len(new_feats)) - new_g.edges[c_etype].data[key] = - F.cat([F.zeors(shape, F,dtype(data[key]), F.context(data[keeps])), + new_g.edges[c_etype].data[key] = \ + F.cat([F.zeros(shape, F.dtype(val), F.context(val)), new_feats], dim=0) else: - for key, val in g.edges[c_etype].data: + for key, val in g.edges[c_etype].data.items(): new_g.edges[c_etype].data[key] = val return new_g @@ -1087,7 +1099,7 @@ def remove_edges(g, eids, etype=None): if len(g.etypes) != 1: raise DGLError('Edge type name must be specified if there are more than one ' \ 'edge types.') - eids = utils.prepare_tensor(self, eids, 'u') + eids = utils.prepare_tensor(g, eids, 'u') assert g.number_of_edges(etype) > F.as_scalar(F.max(eids, dim=0)), \ 'The input eid {} is out of the range [0:{})'.format( F.as_scalar(F.max(eids, dim=0)), g.number_of_edges(etype)) @@ -1098,16 +1110,16 @@ def remove_edges(g, eids, etype=None): for c_etype in g.canonical_etypes: # the target edge type if c_etype == etype: - old_eids = self.edges(form='eid', order='eid', etype=c_etype) + old_eids = g.edges(form='eid', order='eid', etype=c_etype) # trick here, eid_0 is 0 and should be handled old_eids[0] += 1 old_eids = F.scatter_row(old_eids, eids, F.full_1d( len(eids), 0, F.dtype(old_eids), F.context(old_eids))) edges[c_etype] = F.tensor(F.nonzero_1d(old_eids), dtype=F.dtype(old_eids)) else: - edges[c_etype] = self.edges(form='eid', order='eid', etype=c_etype) + edges[c_etype] = g.edges(form='eid', order='eid', etype=c_etype) - sub_g = self.edge_subgraph(edges, preserve_nodes=True) + sub_g = g.edge_subgraph(edges, preserve_nodes=True) return sub_g @@ -1187,25 +1199,27 @@ def remove_nodes(g, nids, ntype=None): raise DGLError('Node type name must be specified if there are more than one ' \ 'node types.') - nids = utils.prepare_tensor(self, nids, 'u') + nids = utils.prepare_tensor(g, nids, 'u') assert g.number_of_nodes(ntype) > F.as_scalar(F.max(nids, dim=0)), \ 'The input nids {} is out of the range [0:{})'.format( F.as_scalar(F.max(nids, dim=0)), g.number_of_nodes(ntype)) nodes = {} for c_ntype in g.ntypes: - if c_ntype == ntype: - old_nids = self.nodes(c_ntype) + # ntype is None: only one ntype + # c_ntype == ntype: ntype to add nodes + if ntype is None or c_ntype == ntype: + old_nids = g.nodes(c_ntype) # trick here, nid_0 is 0 and should be handled old_nids[0] += 1 old_nids = F.scatter_row(old_nids, nids, F.full_1d( len(nids), 0, F.dtype(old_nids), F.context(old_nids))) nodes[c_ntype] = F.tensor(F.nonzero_1d(old_nids), dtype=F.dtype(old_nids)) else: - nodes[c_ntype] = self.nodes(c_ntype) + nodes[c_ntype] = g.nodes(c_ntype) # node_subgraph - sub_g = self.subgraph(nodes) + sub_g = g.subgraph(nodes) return sub_g def add_self_loop(g, etype=None): @@ -1870,55 +1884,6 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True): return new_graph -def remove_edges(g, edge_ids): - """Return a new graph with given edge IDs removed. - - The nodes are preserved. - - Parameters - ---------- - graph : DGLHeteroGraph - The graph - edge_ids : Tensor or dict[etypes, Tensor] - The edge IDs for each edge type. - - Returns - ------- - DGLHeteroGraph - The new graph. - The edge ID mapping from the new graph to the original graph is stored as - ``dgl.EID`` on edge features. - """ - if not isinstance(edge_ids, Mapping): - if len(g.etypes) != 1: - raise ValueError( - "Graph has more than one edge type; specify a dict for edge_id instead.") - edge_ids = {g.canonical_etypes[0]: edge_ids} - - edge_ids_nd = [nd.NULL[g._idtype_str]] * len(g.etypes) - for key, value in edge_ids.items(): - if value.dtype != g.idtype: - # if didn't check, this function still works, but returns wrong result - raise utils.InconsistentDtypeException("Expect edge id tensors({}) to have \ - the same index type as graph({})".format(value.dtype, g.idtype)) - edge_ids_nd[g.get_etype_id(key)] = F.zerocopy_to_dgl_ndarray(value) - new_graph_index, induced_eids_nd = _CAPI_DGLRemoveEdges(g._graph, edge_ids_nd) - - new_graph = DGLHeteroGraph(new_graph_index, g.ntypes, g.etypes) - for i, canonical_etype in enumerate(g.canonical_etypes): - data = induced_eids_nd[i] - if len(data) == 0: - # Empty means that either - # (1) no edges are removed and edges are not shuffled. - # (2) all edges are removed. - # The following statement deals with both cases. - new_graph.edges[canonical_etype].data[EID] = F.arange( - 0, new_graph.number_of_edges(canonical_etype)) - else: - new_graph.edges[canonical_etype].data[EID] = F.zerocopy_from_dgl_ndarray(data) - - return new_graph - def in_subgraph(g, nodes): """Extract the subgraph containing only the in edges of the given nodes. diff --git a/tests/compute/test_heterograph.py b/tests/compute/test_heterograph.py index 267811e22b57..4ccd11f10f47 100644 --- a/tests/compute/test_heterograph.py +++ b/tests/compute/test_heterograph.py @@ -2400,7 +2400,7 @@ def test_add_selfloop(idtype): g = dgl.graph(([0, 0, 2], [2, 1, 0]), idtype=idtype, device=F.ctx()) g.edata['he'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx()) g.ndata['hn'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx()) - g.add_selfloop() + g.add_self_loop() assert g.number_of_nodes() == 3 assert g.number_of_edges() == 6 u, v = g.edges(form='uv', order='eid') @@ -2413,13 +2413,13 @@ def test_add_selfloop(idtype): # nothing will happend raise_error = False try: - g.add_selfloop() + g.add_self_loop() except: raise_error = True assert raise_error g = create_test_heterograph6(idtype) - g.add_selfloop(etype='follows') + g.add_self_loop(etype='follows') assert g.number_of_nodes('user') == 3 assert g.number_of_nodes('game') == 2 assert g.number_of_edges('follows') == 5 @@ -2432,7 +2432,7 @@ def test_add_selfloop(idtype): raise_error = False try: - g.add_selfloop(etype='plays') + g.add_self_loop(etype='plays') except: raise_error = True assert raise_error @@ -2442,7 +2442,7 @@ def test_remove_selfloop(idtype): # homogeneous graph g = dgl.graph(([0, 0, 0, 1], [1, 0, 0, 2]), idtype=idtype, device=F.ctx()) g.edata['he'] = F.copy_to(F.tensor([1, 2, 3, 4], dtype=idtype), ctx=F.ctx()) - g.remove_selfloop() + g.remove_self_loop() assert g.number_of_nodes() == 3 assert g.number_of_edges() == 2 assert F.array_equal(g.edata['he'], F.tensor([1, 4], dtype=idtype)) @@ -2452,13 +2452,13 @@ def test_remove_selfloop(idtype): # nothing will happend raise_error = False try: - g.remove_selfloop(etype='plays') + g.remove_self_loop(etype='plays') except: raise_error = True assert raise_error g = create_test_heterograph5(idtype) - g.remove_selfloop(etype='follows') + g.remove_self_loop(etype='follows') assert g.number_of_nodes('user') == 3 assert g.number_of_nodes('game') == 2 assert g.number_of_edges('follows') == 2 @@ -2471,7 +2471,7 @@ def test_remove_selfloop(idtype): raise_error = False try: - g.remove_selfloop(etype='plays') + g.remove_self_loop(etype='plays') except: raise_error = True assert raise_error diff --git a/tests/compute/test_transform.py b/tests/compute/test_transform.py index 4548c088ba9b..d64d4407f828 100644 --- a/tests/compute/test_transform.py +++ b/tests/compute/test_transform.py @@ -8,6 +8,8 @@ import unittest from utils import parametrize_dtype +from test_heterograph import create_test_heterograph4, create_test_heterograph5, create_test_heterograph6 + D = 5 # line graph related @@ -926,7 +928,154 @@ def _test_cast(): # disabled; prepare for DGLGraph/HeteroGraph merge @parametrize_dtype def test_add_edges(idtype): - pass + # homogeneous graph + g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) + u = 0 + v = 1 + g = dgl.add_edges(g, u, v) + assert g.device == F.ctx() + assert g.number_of_nodes() == 3 + assert g.number_of_edges() == 3 + u = [0] + v = [1] + g = dgl.add_edges(g, u, v) + assert g.device == F.ctx() + assert g.number_of_nodes() == 3 + assert g.number_of_edges() == 4 + u = F.tensor(u, dtype=idtype) + v = F.tensor(v, dtype=idtype) + g = dgl.add_edges(g, u, v) + assert g.device == F.ctx() + assert g.number_of_nodes() == 3 + assert g.number_of_edges() == 5 + u, v = g.edges(form='uv', order='eid') + assert F.array_equal(u, F.tensor([0, 1, 0, 0, 0], dtype=idtype)) + assert F.array_equal(v, F.tensor([1, 2, 1, 1, 1], dtype=idtype)) + + # node id larger than current max node id + g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) + u = F.tensor([0, 1], dtype=idtype) + v = F.tensor([2, 3], dtype=idtype) + g = dgl.add_edges(g, u, v) + assert g.number_of_nodes() == 4 + assert g.number_of_edges() == 4 + u, v = g.edges(form='uv', order='eid') + assert F.array_equal(u, F.tensor([0, 1, 0, 1], dtype=idtype)) + assert F.array_equal(v, F.tensor([1, 2, 2, 3], dtype=idtype)) + + # has data + g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) + g.ndata['h'] = F.copy_to(F.tensor([1, 1, 1], dtype=idtype), ctx=F.ctx()) + g.edata['h'] = F.copy_to(F.tensor([1, 1], dtype=idtype), ctx=F.ctx()) + u = F.tensor([0, 1], dtype=idtype) + v = F.tensor([2, 3], dtype=idtype) + e_feat = {'h' : F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx()), + 'hh' : F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx())} + g = dgl.add_edges(g, u, v, e_feat) + assert g.number_of_nodes() == 4 + assert g.number_of_edges() == 4 + u, v = g.edges(form='uv', order='eid') + assert F.array_equal(u, F.tensor([0, 1, 0, 1], dtype=idtype)) + assert F.array_equal(v, F.tensor([1, 2, 2, 3], dtype=idtype)) + assert F.array_equal(g.ndata['h'], F.tensor([1, 1, 1, 0], dtype=idtype)) + assert F.array_equal(g.edata['h'], F.tensor([1, 1, 2, 2], dtype=idtype)) + assert F.array_equal(g.edata['hh'], F.tensor([0, 0, 2, 2], dtype=idtype)) + + # bipartite graph + g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game', idtype=idtype, device=F.ctx()) + u = 0 + v = 1 + g = dgl.add_edges(g, u, v) + assert g.device == F.ctx() + assert g.number_of_nodes('user') == 2 + assert g.number_of_nodes('game') == 3 + assert g.number_of_edges() == 3 + u = [0] + v = [1] + g = dgl.add_edges(g, u, v) + assert g.device == F.ctx() + assert g.number_of_nodes('user') == 2 + assert g.number_of_nodes('game') == 3 + assert g.number_of_edges() == 4 + u = F.tensor(u, dtype=idtype) + v = F.tensor(v, dtype=idtype) + g = dgl.add_edges(g, u, v) + assert g.device == F.ctx() + assert g.number_of_nodes('user') == 2 + assert g.number_of_nodes('game') == 3 + assert g.number_of_edges() == 5 + u, v = g.edges(form='uv') + assert F.array_equal(u, F.tensor([0, 1, 0, 0, 0], dtype=idtype)) + assert F.array_equal(v, F.tensor([1, 2, 1, 1, 1], dtype=idtype)) + + # node id larger than current max node id + g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game', idtype=idtype, device=F.ctx()) + u = F.tensor([0, 2], dtype=idtype) + v = F.tensor([2, 3], dtype=idtype) + g = dgl.add_edges(g, u, v) + assert g.device == F.ctx() + assert g.number_of_nodes('user') == 3 + assert g.number_of_nodes('game') == 4 + assert g.number_of_edges() == 4 + u, v = g.edges(form='uv', order='eid') + assert F.array_equal(u, F.tensor([0, 1, 0, 2], dtype=idtype)) + assert F.array_equal(v, F.tensor([1, 2, 2, 3], dtype=idtype)) + + # has data + g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game', idtype=idtype, device=F.ctx()) + g.ndata['h'] = {'user' : F.copy_to(F.tensor([1, 1], dtype=idtype), ctx=F.ctx()), + 'game' : F.copy_to(F.tensor([2, 2, 2], dtype=idtype), ctx=F.ctx())} + g.edata['h'] = F.copy_to(F.tensor([1, 1], dtype=idtype), ctx=F.ctx()) + u = F.tensor([0, 2], dtype=idtype) + v = F.tensor([2, 3], dtype=idtype) + e_feat = {'h' : F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx()), + 'hh' : F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx())} + g = dgl.add_edges(g, u, v, e_feat) + assert g.number_of_nodes('user') == 3 + assert g.number_of_nodes('game') == 4 + assert g.number_of_edges() == 4 + u, v = g.edges(form='uv', order='eid') + assert F.array_equal(u, F.tensor([0, 1, 0, 2], dtype=idtype)) + assert F.array_equal(v, F.tensor([1, 2, 2, 3], dtype=idtype)) + assert F.array_equal(g.nodes['user'].data['h'], F.tensor([1, 1, 0], dtype=idtype)) + assert F.array_equal(g.nodes['game'].data['h'], F.tensor([2, 2, 2, 0], dtype=idtype)) + assert F.array_equal(g.edata['h'], F.tensor([1, 1, 2, 2], dtype=idtype)) + assert F.array_equal(g.edata['hh'], F.tensor([0, 0, 2, 2], dtype=idtype)) + + # heterogeneous graph + g = create_test_heterograph4(idtype) + u = F.tensor([0, 2], dtype=idtype) + v = F.tensor([2, 3], dtype=idtype) + g = dgl.add_edges(g, u, v, etype='plays') + assert g.number_of_nodes('user') == 3 + assert g.number_of_nodes('game') == 4 + assert g.number_of_nodes('developer') == 2 + assert g.number_of_edges('plays') == 6 + assert g.number_of_edges('develops') == 2 + u, v = g.edges(form='uv', order='eid', etype='plays') + assert F.array_equal(u, F.tensor([0, 1, 1, 2, 0, 2], dtype=idtype)) + assert F.array_equal(v, F.tensor([0, 0, 1, 1, 2, 3], dtype=idtype)) + assert F.array_equal(g.nodes['user'].data['h'], F.tensor([1, 1, 1], dtype=idtype)) + assert F.array_equal(g.nodes['game'].data['h'], F.tensor([2, 2, 0, 0], dtype=idtype)) + assert F.array_equal(g.edges['plays'].data['h'], F.tensor([1, 1, 1, 1, 0, 0], dtype=idtype)) + + # add with feature + e_feat = {'h': F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx())} + u = F.tensor([0, 2], dtype=idtype) + v = F.tensor([2, 3], dtype=idtype) + g.nodes['game'].data['h'] = F.copy_to(F.tensor([2, 2, 1, 1], dtype=idtype), ctx=F.ctx()) + g = dgl.add_edges(g, u, v, data=e_feat, etype='develops') + assert g.number_of_nodes('user') == 3 + assert g.number_of_nodes('game') == 4 + assert g.number_of_nodes('developer') == 3 + assert g.number_of_edges('plays') == 6 + assert g.number_of_edges('develops') == 4 + u, v = g.edges(form='uv', order='eid', etype='develops') + assert F.array_equal(u, F.tensor([0, 1, 0, 2], dtype=idtype)) + assert F.array_equal(v, F.tensor([0, 1, 2, 3], dtype=idtype)) + assert F.array_equal(g.nodes['developer'].data['h'], F.tensor([3, 3, 0], dtype=idtype)) + assert F.array_equal(g.nodes['game'].data['h'], F.tensor([2, 2, 1, 1], dtype=idtype)) + assert F.array_equal(g.edges['develops'].data['h'], F.tensor([0, 0, 2, 2], dtype=idtype)) @parametrize_dtype def test_add_nodes(idtype): @@ -949,15 +1098,16 @@ def test_add_nodes(idtype): g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game', idtype=idtype, device=F.ctx()) g = dgl.add_nodes(g, 2, data={'h' : F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx())}, ntype='user') assert g.number_of_nodes('user') == 4 + assert g.number_of_nodes('game') == 3 assert F.array_equal(g.nodes['user'].data['h'], F.tensor([0, 0, 2, 2], dtype=idtype)) g = dgl.add_nodes(g, 2, ntype='game') + assert g.number_of_nodes('user') == 4 assert g.number_of_nodes('game') == 5 # heterogeneous graph g = create_test_heterograph4(idtype) g = dgl.add_nodes(g, 1, ntype='user') g = dgl.add_nodes(g, 2, data={'h' : F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx())}, ntype='game') - g = dgl.add_nodes(g, 0, ntype='developer') assert g.number_of_nodes('user') == 4 assert g.number_of_nodes('game') == 4 assert g.number_of_nodes('developer') == 2 @@ -966,19 +1116,269 @@ def test_add_nodes(idtype): @parametrize_dtype def test_remove_edges(idtype): - pass + # homogeneous Graphs + g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) + e = 0 + g = dgl.remove_edges(g, e) + assert g.number_of_edges() == 1 + u, v = g.edges(form='uv', order='eid') + assert F.array_equal(u, F.tensor([1], dtype=idtype)) + assert F.array_equal(v, F.tensor([2], dtype=idtype)) + g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) + e = [0] + g = dgl.remove_edges(g, e) + assert g.number_of_edges() == 1 + u, v = g.edges(form='uv', order='eid') + assert F.array_equal(u, F.tensor([1], dtype=idtype)) + assert F.array_equal(v, F.tensor([2], dtype=idtype)) + e = F.tensor([0], dtype=idtype) + g = dgl.remove_edges(g, e) + assert g.number_of_edges() == 0 + + # has node data + g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) + g.ndata['h'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx()) + g = dgl.remove_edges(g, 1) + assert g.number_of_edges() == 1 + assert F.array_equal(g.ndata['h'], F.tensor([1, 2, 3], dtype=idtype)) + + # has edge data + g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) + g.edata['h'] = F.copy_to(F.tensor([1, 2], dtype=idtype), ctx=F.ctx()) + g = dgl.remove_edges(g, 0) + assert g.number_of_edges() == 1 + assert F.array_equal(g.edata['h'], F.tensor([2], dtype=idtype)) + + # invalid eid + assert_fail = False + try: + g = dgl.remove_edges(g, 1) + except: + assert_fail = True + assert assert_fail + + # bipartite graph + g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game', idtype=idtype, device=F.ctx()) + e = 0 + g = dgl.remove_edges(g, e) + assert g.number_of_edges() == 1 + u, v = g.edges(form='uv', order='eid') + assert F.array_equal(u, F.tensor([1], dtype=idtype)) + assert F.array_equal(v, F.tensor([2], dtype=idtype)) + g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game', idtype=idtype, device=F.ctx()) + e = [0] + g = dgl.remove_edges(g, e) + assert g.number_of_edges() == 1 + u, v = g.edges(form='uv', order='eid') + assert F.array_equal(u, F.tensor([1], dtype=idtype)) + assert F.array_equal(v, F.tensor([2], dtype=idtype)) + e = F.tensor([0], dtype=idtype) + g = dgl.remove_edges(g, e) + assert g.number_of_edges() == 0 + + # has data + g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game', idtype=idtype, device=F.ctx()) + g.ndata['h'] = {'user' : F.copy_to(F.tensor([1, 1], dtype=idtype), ctx=F.ctx()), + 'game' : F.copy_to(F.tensor([2, 2, 2], dtype=idtype), ctx=F.ctx())} + g.edata['h'] = F.copy_to(F.tensor([1, 2], dtype=idtype), ctx=F.ctx()) + g = dgl.remove_edges(g, 1) + assert g.number_of_edges() == 1 + assert F.array_equal(g.nodes['user'].data['h'], F.tensor([1, 1], dtype=idtype)) + assert F.array_equal(g.nodes['game'].data['h'], F.tensor([2, 2, 2], dtype=idtype)) + assert F.array_equal(g.edata['h'], F.tensor([1], dtype=idtype)) + + # heterogeneous graph + g = create_test_heterograph4(idtype) + g.edges['plays'].data['h'] = F.copy_to(F.tensor([1, 2, 3, 4], dtype=idtype), ctx=F.ctx()) + g = dgl.remove_edges(g, 1, etype='plays') + assert g.number_of_edges('plays') == 3 + u, v = g.edges(form='uv', order='eid', etype='plays') + assert F.array_equal(u, F.tensor([0, 1, 2], dtype=idtype)) + assert F.array_equal(v, F.tensor([0, 1, 1], dtype=idtype)) + assert F.array_equal(g.edges['plays'].data['h'], F.tensor([1, 3, 4], dtype=idtype)) + # remove all edges of 'develops' + g = dgl.remove_edges(g, [0, 1], etype='develops') + assert g.number_of_edges('develops') == 0 + assert F.array_equal(g.nodes['user'].data['h'], F.tensor([1, 1, 1], dtype=idtype)) + assert F.array_equal(g.nodes['game'].data['h'], F.tensor([2, 2], dtype=idtype)) + assert F.array_equal(g.nodes['developer'].data['h'], F.tensor([3, 3], dtype=idtype)) @parametrize_dtype def test_remove_nodes(idtype): - pass + # homogeneous Graphs + g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) + n = 0 + g = dgl.remove_nodes(g, n) + assert g.number_of_nodes() == 2 + assert g.number_of_edges() == 1 + u, v = g.edges(form='uv', order='eid') + assert F.array_equal(u, F.tensor([0], dtype=idtype)) + assert F.array_equal(v, F.tensor([1], dtype=idtype)) + g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) + n = [1] + g = dgl.remove_nodes(g, n) + assert g.number_of_nodes() == 2 + assert g.number_of_edges() == 0 + g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) + n = F.tensor([2], dtype=idtype) + g = dgl.remove_nodes(g, n) + assert g.number_of_nodes() == 2 + assert g.number_of_edges() == 1 + u, v = g.edges(form='uv', order='eid') + assert F.array_equal(u, F.tensor([0], dtype=idtype)) + assert F.array_equal(v, F.tensor([1], dtype=idtype)) + + # invalid nid + assert_fail = False + try: + g.remove_nodes(3) + except: + assert_fail = True + assert assert_fail + + # has node and edge data + g = dgl.graph(([0, 0, 2], [0, 1, 2]), idtype=idtype, device=F.ctx()) + g.ndata['hv'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx()) + g.edata['he'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx()) + g = dgl.remove_nodes(g, F.tensor([0], dtype=idtype)) + assert g.number_of_nodes() == 2 + assert g.number_of_edges() == 1 + u, v = g.edges(form='uv', order='eid') + assert F.array_equal(u, F.tensor([1], dtype=idtype)) + assert F.array_equal(v, F.tensor([1], dtype=idtype)) + assert F.array_equal(g.ndata['hv'], F.tensor([2, 3], dtype=idtype)) + assert F.array_equal(g.edata['he'], F.tensor([3], dtype=idtype)) + + # node id larger than current max node id + g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game', idtype=idtype, device=F.ctx()) + n = 0 + g = dgl.remove_nodes(g, n, ntype='user') + assert g.number_of_nodes('user') == 1 + assert g.number_of_nodes('game') == 3 + assert g.number_of_edges() == 1 + u, v = g.edges(form='uv', order='eid') + assert F.array_equal(u, F.tensor([0], dtype=idtype)) + assert F.array_equal(v, F.tensor([2], dtype=idtype)) + g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game', idtype=idtype, device=F.ctx()) + n = [1] + g = dgl.remove_nodes(g, n, ntype='user') + assert g.number_of_nodes('user') == 1 + assert g.number_of_nodes('game') == 3 + assert g.number_of_edges() == 1 + u, v = g.edges(form='uv', order='eid') + assert F.array_equal(u, F.tensor([0], dtype=idtype)) + assert F.array_equal(v, F.tensor([1], dtype=idtype)) + g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game', idtype=idtype, device=F.ctx()) + n = F.tensor([0], dtype=idtype) + g = dgl.remove_nodes(g, n, ntype='game') + assert g.number_of_nodes('user') == 2 + assert g.number_of_nodes('game') == 2 + assert g.number_of_edges() == 2 + u, v = g.edges(form='uv', order='eid') + assert F.array_equal(u, F.tensor([0, 1], dtype=idtype)) + assert F.array_equal(v, F.tensor([0 ,1], dtype=idtype)) + + # heterogeneous graph + g = create_test_heterograph4(idtype) + g.edges['plays'].data['h'] = F.copy_to(F.tensor([1, 2, 3, 4], dtype=idtype), ctx=F.ctx()) + g = dgl.remove_nodes(g, 0, ntype='game') + assert g.number_of_nodes('user') == 3 + assert g.number_of_nodes('game') == 1 + assert g.number_of_nodes('developer') == 2 + assert g.number_of_edges('plays') == 2 + assert g.number_of_edges('develops') == 1 + assert F.array_equal(g.nodes['user'].data['h'], F.tensor([1, 1, 1], dtype=idtype)) + assert F.array_equal(g.nodes['game'].data['h'], F.tensor([2], dtype=idtype)) + assert F.array_equal(g.nodes['developer'].data['h'], F.tensor([3, 3], dtype=idtype)) + u, v = g.edges(form='uv', order='eid', etype='plays') + assert F.array_equal(u, F.tensor([1, 2], dtype=idtype)) + assert F.array_equal(v, F.tensor([0, 0], dtype=idtype)) + assert F.array_equal(g.edges['plays'].data['h'], F.tensor([3, 4], dtype=idtype)) + u, v = g.edges(form='uv', order='eid', etype='develops') + assert F.array_equal(u, F.tensor([1], dtype=idtype)) + assert F.array_equal(v, F.tensor([0], dtype=idtype)) @parametrize_dtype def test_add_selfloop(idtype): - pass + # homogeneous graph + g = dgl.graph(([0, 0, 2], [2, 1, 0]), idtype=idtype, device=F.ctx()) + g.edata['he'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx()) + g.ndata['hn'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx()) + g = dgl.add_self_loop(g) + assert g.number_of_nodes() == 3 + assert g.number_of_edges() == 6 + u, v = g.edges(form='uv', order='eid') + assert F.array_equal(u, F.tensor([0, 0, 2, 0, 1, 2], dtype=idtype)) + assert F.array_equal(v, F.tensor([2, 1, 0, 0, 1, 2], dtype=idtype)) + assert F.array_equal(g.edata['he'], F.tensor([1, 2, 3, 0, 0, 0], dtype=idtype)) + + # bipartite graph + g = dgl.bipartite(([0, 1, 2], [1, 2, 2]), 'user', 'plays', 'game', idtype=idtype, device=F.ctx()) + # nothing will happend + raise_error = False + try: + g = dgl.add_self_loop(g) + except: + raise_error = True + assert raise_error + + g = create_test_heterograph6(idtype) + g = dgl.add_self_loop(g, etype='follows') + assert g.number_of_nodes('user') == 3 + assert g.number_of_nodes('game') == 2 + assert g.number_of_edges('follows') == 5 + assert g.number_of_edges('plays') == 2 + u, v = g.edges(form='uv', order='eid', etype='follows') + assert F.array_equal(u, F.tensor([1, 2, 0, 1, 2], dtype=idtype)) + assert F.array_equal(v, F.tensor([0, 1, 0, 1, 2], dtype=idtype)) + assert F.array_equal(g.edges['follows'].data['h'], F.tensor([1, 2, 0, 0, 0], dtype=idtype)) + assert F.array_equal(g.edges['plays'].data['h'], F.tensor([1, 2], dtype=idtype)) + + raise_error = False + try: + g = dgl.add_self_loop(g, etype='plays') + except: + raise_error = True + assert raise_error @parametrize_dtype def test_remove_selfloop(idtype): - pass + # homogeneous graph + g = dgl.graph(([0, 0, 0, 1], [1, 0, 0, 2]), idtype=idtype, device=F.ctx()) + g.edata['he'] = F.copy_to(F.tensor([1, 2, 3, 4], dtype=idtype), ctx=F.ctx()) + g = dgl.remove_self_loop(g) + assert g.number_of_nodes() == 3 + assert g.number_of_edges() == 2 + assert F.array_equal(g.edata['he'], F.tensor([1, 4], dtype=idtype)) + + # bipartite graph + g = dgl.bipartite(([0, 1, 2], [1, 2, 2]), 'user', 'plays', 'game', idtype=idtype, device=F.ctx()) + # nothing will happend + raise_error = False + try: + g = dgl.remove_self_loop(g, etype='plays') + except: + raise_error = True + assert raise_error + + g = create_test_heterograph5(idtype) + g = dgl.remove_self_loop(g, etype='follows') + assert g.number_of_nodes('user') == 3 + assert g.number_of_nodes('game') == 2 + assert g.number_of_edges('follows') == 2 + assert g.number_of_edges('plays') == 2 + u, v = g.edges(form='uv', order='eid', etype='follows') + assert F.array_equal(u, F.tensor([1, 2], dtype=idtype)) + assert F.array_equal(v, F.tensor([0, 1], dtype=idtype)) + assert F.array_equal(g.edges['follows'].data['h'], F.tensor([2, 4], dtype=idtype)) + assert F.array_equal(g.edges['plays'].data['h'], F.tensor([1, 2], dtype=idtype)) + + raise_error = False + try: + g = dgl.remove_self_loop(g, etype='plays') + except: + raise_error = True + assert raise_error if __name__ == '__main__': #test_reorder_nodes() @@ -1002,8 +1402,8 @@ def test_remove_selfloop(idtype): # test_out_subgraph() # test_to_block("int32") # test_remove_edges() - test_add_edges(F.int32) test_add_nodes(F.int32) + test_add_edges(F.int32) test_remove_edges(F.int32) test_remove_nodes(F.int32) test_add_selfloop(F.int32) From 23c7ef4cde6a7f961a25e761108727f860814b7c Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 20 Jul 2020 05:40:20 +0000 Subject: [PATCH 04/15] lint --- python/dgl/transform.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/python/dgl/transform.py b/python/dgl/transform.py index 65c14ea8f3b4..d56708d6c64f 100644 --- a/python/dgl/transform.py +++ b/python/dgl/transform.py @@ -767,7 +767,7 @@ def add_nodes(g, num, data=None, ntype=None): if ntype is None: if len(g.ntypes) != 1: raise DGLError('Node type name must be specified if there are more than one ' - 'node types.') + 'node types.') assert num > 0, 'Number of new nodes should be larger than one.' @@ -785,8 +785,8 @@ def add_nodes(g, num, data=None, ntype=None): new_g = heterograph(graph_data, num_nodes_dict, - idtype = g.idtype, - device = g.device) + idtype=g.idtype, + device=g.device) for c_ntype in g.ntypes: # ntype is None: only one ntype @@ -938,7 +938,7 @@ def add_edges(g, u, v, data=None, etype=None): if etype is None: if len(g.etypes) != 1: raise DGLError('Edge type name must be specified if there are more than one ' - 'edge types.') + 'edge types.') assert len(u) > 0 and len(v) > 0, \ 'The number of source nodes and the number of destination nodes should be larger than 0' @@ -949,7 +949,7 @@ def add_edges(g, u, v, data=None, etype=None): # fill up u and v if len(u) == 1 and len(v) > 1: - u = F.full_1d(len(v), F.as_scalar(u), dtype=F.dtype(u), ctx=F.context(u)) + u = F.full_1d(len(v), F.as_scalar(u), dtype=F.dtype(u), ctx=F.context(u)) if len(v) == 1 and len(u) > 1: v = F.full_1d(len(u), F.as_scalar(v), dtype=F.dtype(v), ctx=F.context(v)) @@ -986,8 +986,8 @@ def add_edges(g, u, v, data=None, etype=None): new_g = heterograph(graph_data, num_nodes_dict, - idtype = g.idtype, - device = g.device) + idtype=g.idtype, + device=g.device) # copy node features for c_ntype in g.ntypes: @@ -1345,10 +1345,10 @@ def remove_self_loop(g, etype=None): """ etype = g.to_canonical_etype(etype) if etype[0] != etype[2]: - raise DGLError( - 'remove_self_loop does not support unidirectional bipartite graphs: {}.' \ - 'Please make sure the types of head node and tail node are identical.' \ - ''.format(etype)) + raise DGLError( + 'remove_self_loop does not support unidirectional bipartite graphs: {}.' \ + 'Please make sure the types of head node and tail node are identical.' \ + ''.format(etype)) u, v = g.edges(form='uv', order='eid', etype=etype) self_loop_eids = F.tensor(F.nonzero_1d(u == v), dtype=F.dtype(u)) new_g = remove_edges(g, self_loop_eids, etype=etype) From 1358c56d370c516f57feccacfaf2b3221b844fea Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 20 Jul 2020 05:53:01 +0000 Subject: [PATCH 05/15] Add some test case --- tests/compute/test_heterograph.py | 15 +++++++++++++ tests/compute/test_transform.py | 36 +++++++++++++------------------ 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/tests/compute/test_heterograph.py b/tests/compute/test_heterograph.py index 4ccd11f10f47..ac8c370fea15 100644 --- a/tests/compute/test_heterograph.py +++ b/tests/compute/test_heterograph.py @@ -2078,6 +2078,21 @@ def test_add_edges(idtype): assert F.array_equal(g.edata['h'], F.tensor([1, 1, 2, 2], dtype=idtype)) assert F.array_equal(g.edata['hh'], F.tensor([0, 0, 2, 2], dtype=idtype)) + # zero data graph + g = dgl.graph([], num_nodes=0, idtype=idtype, device=F.ctx()) + u = F.tensor([0, 1], dtype=idtype) + v = F.tensor([2, 2], dtype=idtype) + e_feat = {'h' : F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx()), + 'hh' : F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx())} + g.add_edges(u, v, e_feat) + assert g.number_of_nodes() == 3 + assert g.number_of_edges() == 2 + u, v = g.edges(form='uv', order='eid') + assert F.array_equal(u, F.tensor([0, 1], dtype=idtype)) + assert F.array_equal(v, F.tensor([2, 2], dtype=idtype)) + assert F.array_equal(g.edata['h'], F.tensor([2, 2], dtype=idtype)) + assert F.array_equal(g.edata['hh'], F.tensor([2, 2], dtype=idtype)) + # bipartite graph g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game', idtype=idtype, device=F.ctx()) u = 0 diff --git a/tests/compute/test_transform.py b/tests/compute/test_transform.py index d64d4407f828..77c7fcc81b05 100644 --- a/tests/compute/test_transform.py +++ b/tests/compute/test_transform.py @@ -346,25 +346,6 @@ def test_laplacian_lambda_max(): for l_max in l_max_arr: assert l_max < 2 + eps - -def test_add_self_loop(): - g = dgl.DGLGraph() - g.add_nodes(5) - g.add_edges([0, 1, 2], [1, 1, 2]) - # Nodes 0, 3, 4 don't have self-loop - new_g = dgl.transform.add_self_loop(g) - assert F.allclose(new_g.edges()[0], F.tensor([0, 0, 1, 2, 3, 4])) - assert F.allclose(new_g.edges()[1], F.tensor([1, 0, 1, 2, 3, 4])) - - -def test_remove_self_loop(): - g = dgl.DGLGraph() - g.add_nodes(5) - g.add_edges([0, 1, 2], [1, 1, 2]) - new_g = dgl.transform.remove_self_loop(g) - assert F.allclose(new_g.edges()[0], F.tensor([0])) - assert F.allclose(new_g.edges()[1], F.tensor([1])) - def create_large_graph_index(num_nodes): row = np.random.choice(num_nodes, num_nodes * 10) col = np.random.choice(num_nodes, num_nodes * 10) @@ -981,6 +962,21 @@ def test_add_edges(idtype): assert F.array_equal(g.edata['h'], F.tensor([1, 1, 2, 2], dtype=idtype)) assert F.array_equal(g.edata['hh'], F.tensor([0, 0, 2, 2], dtype=idtype)) + # zero data graph + g = dgl.graph([], num_nodes=0, idtype=idtype, device=F.ctx()) + u = F.tensor([0, 1], dtype=idtype) + v = F.tensor([2, 2], dtype=idtype) + e_feat = {'h' : F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx()), + 'hh' : F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx())} + g = dgl.add_edges(g, u, v, e_feat) + assert g.number_of_nodes() == 3 + assert g.number_of_edges() == 2 + u, v = g.edges(form='uv', order='eid') + assert F.array_equal(u, F.tensor([0, 1], dtype=idtype)) + assert F.array_equal(v, F.tensor([2, 2], dtype=idtype)) + assert F.array_equal(g.edata['h'], F.tensor([2, 2], dtype=idtype)) + assert F.array_equal(g.edata['hh'], F.tensor([2, 2], dtype=idtype)) + # bipartite graph g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game', idtype=idtype, device=F.ctx()) u = 0 @@ -1391,8 +1387,6 @@ def test_remove_selfloop(idtype): # test_khop_adj() # test_khop_graph() # test_laplacian_lambda_max() - # test_remove_self_loop() - # test_add_self_loop() # test_partition_with_halo() # test_metis_partition() # test_hetero_linegraph('int32') From fdfdc865e93287842683d8a8a126c18e00a6dafc Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 20 Jul 2020 06:15:39 +0000 Subject: [PATCH 06/15] Fix --- python/dgl/heterograph.py | 12 ++++++++++-- python/dgl/transform.py | 4 +++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/python/dgl/heterograph.py b/python/dgl/heterograph.py index 4280df7cf030..e31cba39efd9 100644 --- a/python/dgl/heterograph.py +++ b/python/dgl/heterograph.py @@ -698,7 +698,9 @@ def remove_edges(self, eids, etype=None): if c_etype == (u_type, e_type, v_type): old_eids = self.edges(form='eid', order='eid', etype=c_etype) # trick here, eid_0 is 0 and should be handled - old_eids[0] += 1 + old_eids = F.scatter_row(old_eids, + F.tensor(0, dtype=F.int64), + F.tensor(1, dtype=F.dtype(old_eids))) old_eids = F.scatter_row(old_eids, eids, F.full_1d( len(eids), 0, F.dtype(old_eids), F.context(old_eids))) edges[c_etype] = F.tensor(F.nonzero_1d(old_eids), dtype=F.dtype(old_eids)) @@ -809,7 +811,7 @@ def add_self_loop(self, etype=None): Parameters ---------- etype : str or tuple of str, optional - The type of the edges to remove. Can be omitted if there is + The type of the edges to add self loops. Can be omitted if there is only one edge type in the graph. Notes @@ -877,6 +879,12 @@ def remove_self_loop(self, etype=None): If there are multiple self loops for a certain node, all of them will be removed. + Parameters + ---------- + etype : str or tuple of str, optional + The type of the edges to remove self loops. Can be omitted if there is + only one edge type in the graph. + Examples -------- diff --git a/python/dgl/transform.py b/python/dgl/transform.py index d56708d6c64f..076364ffbf23 100644 --- a/python/dgl/transform.py +++ b/python/dgl/transform.py @@ -1112,7 +1112,9 @@ def remove_edges(g, eids, etype=None): if c_etype == etype: old_eids = g.edges(form='eid', order='eid', etype=c_etype) # trick here, eid_0 is 0 and should be handled - old_eids[0] += 1 + old_eids = F.scatter_row(old_eids, + F.tensor(0, dtype=F.int64), + F.tensor(1, dtype=F.dtype(old_eids))) old_eids = F.scatter_row(old_eids, eids, F.full_1d( len(eids), 0, F.dtype(old_eids), F.context(old_eids))) edges[c_etype] = F.tensor(F.nonzero_1d(old_eids), dtype=F.dtype(old_eids)) From 49b882a8d7703eac389af9b1a9fbe2b4889ba5c2 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 20 Jul 2020 06:32:19 +0000 Subject: [PATCH 07/15] Fix --- python/dgl/heterograph.py | 8 +++++--- python/dgl/transform.py | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/python/dgl/heterograph.py b/python/dgl/heterograph.py index e31cba39efd9..2010d1cb3807 100644 --- a/python/dgl/heterograph.py +++ b/python/dgl/heterograph.py @@ -699,8 +699,8 @@ def remove_edges(self, eids, etype=None): old_eids = self.edges(form='eid', order='eid', etype=c_etype) # trick here, eid_0 is 0 and should be handled old_eids = F.scatter_row(old_eids, - F.tensor(0, dtype=F.int64), - F.tensor(1, dtype=F.dtype(old_eids))) + F.copy_to(F.tensor(0, dtype=F.int64), F.context(old_eids)), + F.copy_to(F.tensor(1, dtype=F.dtype(old_eids)),F.context(old_eids))) old_eids = F.scatter_row(old_eids, eids, F.full_1d( len(eids), 0, F.dtype(old_eids), F.context(old_eids))) edges[c_etype] = F.tensor(F.nonzero_1d(old_eids), dtype=F.dtype(old_eids)) @@ -792,7 +792,9 @@ def remove_nodes(self, nids, ntype=None): if self.get_ntype_id(c_ntype) == ntid: old_nids = self.nodes(c_ntype) # trick here, nid_0 is 0 and should be handled - old_nids[0] += 1 + old_nids = F.scatter_row(old_nids, + F.copy_to(F.tensor(0, dtype=F.int64), F.context(old_nids)), + F.copy_to(F.tensor(1, dtype=F.dtype(old_nids)),F.context(old_nids))) old_nids = F.scatter_row(old_nids, nids, F.full_1d( len(nids), 0, F.dtype(old_nids), F.context(old_nids))) nodes[c_ntype] = F.tensor(F.nonzero_1d(old_nids), dtype=F.dtype(old_nids)) diff --git a/python/dgl/transform.py b/python/dgl/transform.py index 076364ffbf23..18365ff1fbcd 100644 --- a/python/dgl/transform.py +++ b/python/dgl/transform.py @@ -1113,8 +1113,8 @@ def remove_edges(g, eids, etype=None): old_eids = g.edges(form='eid', order='eid', etype=c_etype) # trick here, eid_0 is 0 and should be handled old_eids = F.scatter_row(old_eids, - F.tensor(0, dtype=F.int64), - F.tensor(1, dtype=F.dtype(old_eids))) + F.copy_to(F.tensor(0, dtype=F.int64), F.context(old_eids)), + F.copy_to(F.tensor(1, dtype=F.dtype(old_eids)),F.context(old_eids))) old_eids = F.scatter_row(old_eids, eids, F.full_1d( len(eids), 0, F.dtype(old_eids), F.context(old_eids))) edges[c_etype] = F.tensor(F.nonzero_1d(old_eids), dtype=F.dtype(old_eids)) @@ -1213,7 +1213,9 @@ def remove_nodes(g, nids, ntype=None): if ntype is None or c_ntype == ntype: old_nids = g.nodes(c_ntype) # trick here, nid_0 is 0 and should be handled - old_nids[0] += 1 + old_nids = F.scatter_row(old_nids, + F.copy_to(F.tensor(0, dtype=F.int64), F.context(old_nids)), + F.copy_to(F.tensor(1, dtype=F.dtype(old_nids)),F.context(old_nids))) old_nids = F.scatter_row(old_nids, nids, F.full_1d( len(nids), 0, F.dtype(old_nids), F.context(old_nids))) nodes[c_ntype] = F.tensor(F.nonzero_1d(old_nids), dtype=F.dtype(old_nids)) From 53411e209ad8b2175be3b8354c8c2554d1ab29d3 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 20 Jul 2020 06:37:46 +0000 Subject: [PATCH 08/15] Fix --- python/dgl/heterograph.py | 12 ++++++++---- python/dgl/transform.py | 12 ++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/python/dgl/heterograph.py b/python/dgl/heterograph.py index 2010d1cb3807..8884a9f99728 100644 --- a/python/dgl/heterograph.py +++ b/python/dgl/heterograph.py @@ -699,8 +699,10 @@ def remove_edges(self, eids, etype=None): old_eids = self.edges(form='eid', order='eid', etype=c_etype) # trick here, eid_0 is 0 and should be handled old_eids = F.scatter_row(old_eids, - F.copy_to(F.tensor(0, dtype=F.int64), F.context(old_eids)), - F.copy_to(F.tensor(1, dtype=F.dtype(old_eids)),F.context(old_eids))) + F.copy_to(F.tensor(0, dtype=F.int64), + F.context(old_eids)), + F.copy_to(F.tensor(1, dtype=F.dtype(old_eids)), + F.context(old_eids))) old_eids = F.scatter_row(old_eids, eids, F.full_1d( len(eids), 0, F.dtype(old_eids), F.context(old_eids))) edges[c_etype] = F.tensor(F.nonzero_1d(old_eids), dtype=F.dtype(old_eids)) @@ -793,8 +795,10 @@ def remove_nodes(self, nids, ntype=None): old_nids = self.nodes(c_ntype) # trick here, nid_0 is 0 and should be handled old_nids = F.scatter_row(old_nids, - F.copy_to(F.tensor(0, dtype=F.int64), F.context(old_nids)), - F.copy_to(F.tensor(1, dtype=F.dtype(old_nids)),F.context(old_nids))) + F.copy_to(F.tensor(0, dtype=F.int64), + F.context(old_nids)), + F.copy_to(F.tensor(1, dtype=F.dtype(old_nids)), + F.context(old_nids))) old_nids = F.scatter_row(old_nids, nids, F.full_1d( len(nids), 0, F.dtype(old_nids), F.context(old_nids))) nodes[c_ntype] = F.tensor(F.nonzero_1d(old_nids), dtype=F.dtype(old_nids)) diff --git a/python/dgl/transform.py b/python/dgl/transform.py index 18365ff1fbcd..b53c65dc8523 100644 --- a/python/dgl/transform.py +++ b/python/dgl/transform.py @@ -1113,8 +1113,10 @@ def remove_edges(g, eids, etype=None): old_eids = g.edges(form='eid', order='eid', etype=c_etype) # trick here, eid_0 is 0 and should be handled old_eids = F.scatter_row(old_eids, - F.copy_to(F.tensor(0, dtype=F.int64), F.context(old_eids)), - F.copy_to(F.tensor(1, dtype=F.dtype(old_eids)),F.context(old_eids))) + F.copy_to(F.tensor(0, dtype=F.int64), + F.context(old_eids)), + F.copy_to(F.tensor(1, dtype=F.dtype(old_eids)), + F.context(old_eids))) old_eids = F.scatter_row(old_eids, eids, F.full_1d( len(eids), 0, F.dtype(old_eids), F.context(old_eids))) edges[c_etype] = F.tensor(F.nonzero_1d(old_eids), dtype=F.dtype(old_eids)) @@ -1214,8 +1216,10 @@ def remove_nodes(g, nids, ntype=None): old_nids = g.nodes(c_ntype) # trick here, nid_0 is 0 and should be handled old_nids = F.scatter_row(old_nids, - F.copy_to(F.tensor(0, dtype=F.int64), F.context(old_nids)), - F.copy_to(F.tensor(1, dtype=F.dtype(old_nids)),F.context(old_nids))) + F.copy_to(F.tensor(0, dtype=F.int64), + F.context(old_nids)), + F.copy_to(F.tensor(1, dtype=F.dtype(old_nids)), + F.context(old_nids))) old_nids = F.scatter_row(old_nids, nids, F.full_1d( len(nids), 0, F.dtype(old_nids), F.context(old_nids))) nodes[c_ntype] = F.tensor(F.nonzero_1d(old_nids), dtype=F.dtype(old_nids)) From fb5cb86271a98cd2fde42aad9ee6fb4ec6744428 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 20 Jul 2020 06:55:22 +0000 Subject: [PATCH 09/15] Fix --- python/dgl/heterograph.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/dgl/heterograph.py b/python/dgl/heterograph.py index 8884a9f99728..ddfd8b109be5 100644 --- a/python/dgl/heterograph.py +++ b/python/dgl/heterograph.py @@ -700,9 +700,9 @@ def remove_edges(self, eids, etype=None): # trick here, eid_0 is 0 and should be handled old_eids = F.scatter_row(old_eids, F.copy_to(F.tensor(0, dtype=F.int64), - F.context(old_eids)), + F.context(old_eids)), F.copy_to(F.tensor(1, dtype=F.dtype(old_eids)), - F.context(old_eids))) + F.context(old_eids))) old_eids = F.scatter_row(old_eids, eids, F.full_1d( len(eids), 0, F.dtype(old_eids), F.context(old_eids))) edges[c_etype] = F.tensor(F.nonzero_1d(old_eids), dtype=F.dtype(old_eids)) @@ -796,9 +796,9 @@ def remove_nodes(self, nids, ntype=None): # trick here, nid_0 is 0 and should be handled old_nids = F.scatter_row(old_nids, F.copy_to(F.tensor(0, dtype=F.int64), - F.context(old_nids)), + F.context(old_nids)), F.copy_to(F.tensor(1, dtype=F.dtype(old_nids)), - F.context(old_nids))) + F.context(old_nids))) old_nids = F.scatter_row(old_nids, nids, F.full_1d( len(nids), 0, F.dtype(old_nids), F.context(old_nids))) nodes[c_ntype] = F.tensor(F.nonzero_1d(old_nids), dtype=F.dtype(old_nids)) From 5eeab847660992871fd09ba270571c54895af9e8 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 20 Jul 2020 16:04:46 +0000 Subject: [PATCH 10/15] Fix --- examples/pytorch/rgcn/entity_classify.py | 2 +- python/dgl/heterograph.py | 181 +++-------------- python/dgl/transform.py | 246 ++--------------------- python/dgl/utils.py | 25 +++ tests/compute/test_heterograph.py | 146 ++++++-------- 5 files changed, 131 insertions(+), 469 deletions(-) diff --git a/examples/pytorch/rgcn/entity_classify.py b/examples/pytorch/rgcn/entity_classify.py index c6010948db43..2aad68af1875 100644 --- a/examples/pytorch/rgcn/entity_classify.py +++ b/examples/pytorch/rgcn/entity_classify.py @@ -22,7 +22,7 @@ class EntityClassify(BaseRGCN): def create_features(self): - features = torch.arange(self.num_nodes) + features = torch.ones(self.num_nodes) if self.use_cuda: features = features.cuda() return features diff --git a/python/dgl/heterograph.py b/python/dgl/heterograph.py index ddfd8b109be5..fcd7205a8ff2 100644 --- a/python/dgl/heterograph.py +++ b/python/dgl/heterograph.py @@ -696,16 +696,8 @@ def remove_edges(self, eids, etype=None): for c_etype in self.canonical_etypes: # the target edge type if c_etype == (u_type, e_type, v_type): - old_eids = self.edges(form='eid', order='eid', etype=c_etype) - # trick here, eid_0 is 0 and should be handled - old_eids = F.scatter_row(old_eids, - F.copy_to(F.tensor(0, dtype=F.int64), - F.context(old_eids)), - F.copy_to(F.tensor(1, dtype=F.dtype(old_eids)), - F.context(old_eids))) - old_eids = F.scatter_row(old_eids, eids, F.full_1d( - len(eids), 0, F.dtype(old_eids), F.context(old_eids))) - edges[c_etype] = F.tensor(F.nonzero_1d(old_eids), dtype=F.dtype(old_eids)) + origin_eids = self.edges(form='eid', order='eid', etype=c_etype) + edges[c_etype] = utils.compensate(eids, origin_eids) else: edges[c_etype] = self.edges(form='eid', order='eid', etype=c_etype) @@ -792,16 +784,8 @@ def remove_nodes(self, nids, ntype=None): nodes = {} for c_ntype in self.ntypes: if self.get_ntype_id(c_ntype) == ntid: - old_nids = self.nodes(c_ntype) - # trick here, nid_0 is 0 and should be handled - old_nids = F.scatter_row(old_nids, - F.copy_to(F.tensor(0, dtype=F.int64), - F.context(old_nids)), - F.copy_to(F.tensor(1, dtype=F.dtype(old_nids)), - F.context(old_nids))) - old_nids = F.scatter_row(old_nids, nids, F.full_1d( - len(nids), 0, F.dtype(old_nids), F.context(old_nids))) - nodes[c_ntype] = F.tensor(F.nonzero_1d(old_nids), dtype=F.dtype(old_nids)) + original_nids = self.nodes(c_ntype) + nodes[c_ntype] = utils.compensate(nids, original_nids) else: nodes[c_ntype] = self.nodes(c_ntype) @@ -811,137 +795,6 @@ def remove_nodes(self, nids, ntype=None): self._node_frames = sub_g._node_frames self._edge_frames = sub_g._edge_frames - def add_self_loop(self, etype=None): - r""" Add self loop for each node in the graph. - - Parameters - ---------- - etype : str or tuple of str, optional - The type of the edges to add self loops. Can be omitted if there is - only one edge type in the graph. - - Notes - ----- - * It is recommanded to ``remove_self_loop`` before invoking - ``add_self_loop``. - * Inplace update is applied to the current graph. - * Features for the new edges (self-loop edges) will be created - by initializers defined with :func:`set_n_initializer` - (default initializer fills zeros). - - Examples - -------- - - >>> import dgl - >>> import torch - - **Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type** - - >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([2, 1, 0]))) - >>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1) - >>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1) - >>> g - >>> g - Graph(num_nodes=3, num_edges=6, - ndata_schemes={'hv': Scheme(shape=(1,), dtype=torch.float32)} - edata_schemes={'he': Scheme(shape=(1,), dtype=torch.float32)}) - >>> g.edata['he'] - tensor([[0.], - [1.], - [2.], - [0.], - [0.], - [0.]]) - - **Heterogeneous Graphs with Multiple Node Types** - - >>> g = dgl.heterograph({ - ('user', 'follows', 'user'): (torch.tensor([1, 2]), - torch.tensor([0, 1])), - ('user', 'plays', 'game'): (torch.tensor([0, 1]), - torch.tensor([0, 1]))}) - >>> g.add_self_loop(etype='follows') - >>> g - Graph(num_nodes={'user': 3, 'game': 2}, - num_edges={('user', 'plays', 'game'): 2, ('user', 'follows', 'user'): 5}, - metagraph=[('user', 'user'), ('user', 'game')]) - - See Also - -------- - remove_self_loop - """ - etype = self.to_canonical_etype(etype) - if etype[0] != etype[2]: - raise DGLError( - 'add_self_loop does not support unidirectional bipartite graphs: {}.' \ - 'Please make sure the types of head node and tail node are identical.' \ - ''.format(etype)) - nodes = self.nodes(etype[0]) - self.add_edges(nodes, nodes, etype=etype) - - def remove_self_loop(self, etype=None): - r""" Remove self loops for each node in the graph. - - If there are multiple self loops for a certain node, - all of them will be removed. - - Parameters - ---------- - etype : str or tuple of str, optional - The type of the edges to remove self loops. Can be omitted if there is - only one edge type in the graph. - - Examples - -------- - - >>> import dgl - >>> import torch - - **Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type** - - >>> g = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([1, 0, 0, 2])), - idtype=idtype, device=F.ctx()) - >>> g.edata['he'] = torch.arange(4).float().reshape(-1, 1) - >>> g.remove_self_loop() - >>> g - Graph(num_nodes=3, num_edges=2, - edata_schemes={'he': Scheme(shape=(2,), dtype=torch.float32)}) - >>> g.edata['he'] - tensor([[0.],[3.]]) - - **Heterogeneous Graphs with Multiple Node Types** - - >>> g = dgl.heterograph({ - >>> ('user', 'follows', 'user'): (torch.tensor([0, 1, 1, 1, 2]), - >>> torch.tensor([0, 0, 1, 1, 1])), - >>> ('user', 'plays', 'game'): (torch.tensor([0, 1]), - >>> torch.tensor([0, 1])) - >>> }) - >>> g.remove_self_loop(etype='follows') - >>> g.num_nodes('user') - 3 - >>> g.num_nodes('game') - 2 - >>> g.num_edges('follows') - 2 - >>> g.num_edges('plays') - 2 - - See Also - -------- - add_self_loop - """ - # TODO(xiangsx) need to handle block - etype = self.to_canonical_etype(etype) - if etype[0] != etype[2]: - raise DGLError( - 'remove_self_loop does not support unidirectional bipartite graphs: {}.' \ - 'Please make sure the types of head node and tail node are identical.' \ - ''.format(etype)) - u, v = self.edges(form='uv', order='eid', etype=etype) - self_loop_eids = F.tensor(F.nonzero_1d(u == v), dtype=F.dtype(u)) - self.remove_edges(self_loop_eids, etype=etype) - ################################################################# # Metagraph query ################################################################# @@ -4494,6 +4347,32 @@ def cpu(self): """ return self.to(F.cpu()) + def clone(self): + """Return a heterograph object that is a clone of current graph. + + Returns + ------- + DGLHeteroGraph + The graph object that is a clone of current graph. + """ + meta_edges = [] + for s_ntype, e_type, d_ntype in self.canonical_etypes: + meta_edges.append((self.get_ntype_id(s_ntype), self.get_ntype_id(d_ntype))) + + metagraph = graph_index.from_edge_list(meta_edges, True) + # rebuild graph idx + num_nodes_per_type = [self.number_of_nodes(c_ntype) for c_ntype in self.ntypes] + relation_graphs = [self._graph.get_relation_graph(self.get_etype_id(c_etype)) + for c_etype in self.canonical_etypes] + hgidx = heterograph_index.create_heterograph_from_relations( + metagraph, relation_graphs, utils.toindex(num_nodes_per_type, "int64")) + + local_node_frames = [fr.clone() for fr in self._node_frames] + local_edge_frames = [fr.clone() for fr in self._edge_frames] + return DGLHeteroGraph(hgidx, self.ntypes, self.etypes, + local_node_frames, local_edge_frames) + + def local_var(self): """Return a heterograph object that can be used in a local function scope. diff --git a/python/dgl/transform.py b/python/dgl/transform.py index b53c65dc8523..5ef8b2ba850e 100644 --- a/python/dgl/transform.py +++ b/python/dgl/transform.py @@ -763,74 +763,9 @@ def add_nodes(g, num, data=None, ntype=None): add_edges remove_edges """ - # TODO(xiangsx): block do not support add_nodes - if ntype is None: - if len(g.ntypes) != 1: - raise DGLError('Node type name must be specified if there are more than one ' - 'node types.') - - assert num > 0, 'Number of new nodes should be larger than one.' - - num_nodes_dict = {} - if ntype is None: - num_nodes_dict[g.ntypes[0]] = g.number_of_nodes(g.ntypes[0]) + num - else: - for c_ntype in g.ntypes: - num_nodes_dict[c_ntype] = \ - g.number_of_nodes(c_ntype) + (num if c_ntype == ntype else 0) - graph_data = {} - for c_etype in g.canonical_etypes: - u, v = g.edges(form='uv', order='eid', etype=c_etype) - graph_data[c_etype] = (u, v) - - new_g = heterograph(graph_data, - num_nodes_dict, - idtype=g.idtype, - device=g.device) - - for c_ntype in g.ntypes: - # ntype is None: only one ntype - # c_ntype == ntype: ntype to add nodes - if ntype is None or c_ntype == ntype: - ndata = g.nodes[c_ntype].data - # existing features - for key, val in ndata.items(): - if data is not None and data.get(key, None) is not None: - new_feats = utils.prepare_tensor(new_g, data[key], 'data') - assert len(new_feats) == num, \ - 'Data length of {} should be {}, but got {}'.format(key, - num, - len(new_feats)) - new_g.nodes[c_ntype].data[key] = F.cat([val, new_feats], dim=0) - else: - shape = F.shape(val) - shape = (num,) + shape[1:] - new_feats = F.zeros(shape, F.dtype(val), F.context(val)) - new_g.nodes[c_ntype].data[key] = F.cat([val, new_feats], dim=0) - - # non-existing features - if data is not None: - for key, val in data.items(): - if ndata.get(key, None) is None: - shape = F.shape(data[key]) - shape = (g.number_of_nodes(c_ntype),) + shape[1:] - new_feats = utils.prepare_tensor(new_g, data[key], 'data') - assert len(new_feats) == num, \ - 'Data length of {} should be {}, but got {}'.format(key, - num, - len(new_feats)) - new_g.nodes[c_ntype].data[key] = \ - F.cat([F.zeros(shape, F.dtype(data[key]), F.context(data[key])), - new_feats], dim=0) - else: - for key, val in g.nodes[c_ntype].data.items(): - new_g.nodes[c_ntype].data[key] = val - - for c_etype in g.canonical_etypes: - for key, val in g.edges[c_etype].data.items(): - new_g.edges[c_etype].data[key] = val - - return new_g + g = g.clone() + g.add_nodes(num, data=data, ntype=ntype) + return g def add_edges(g, u, v, data=None, etype=None): r"""Add multiple new edges for the specified edge type. @@ -931,107 +866,9 @@ def add_edges(g, u, v, data=None, etype=None): remove_nodes remove_edges """ - # TODO(xiangsx): block do not support add_edges - u = utils.prepare_tensor(g, u, 'u') - v = utils.prepare_tensor(g, v, 'v') - - if etype is None: - if len(g.etypes) != 1: - raise DGLError('Edge type name must be specified if there are more than one ' - 'edge types.') - - assert len(u) > 0 and len(v) > 0, \ - 'The number of source nodes and the number of destination nodes should be larger than 0' - - assert len(u) == len(v) or len(u) == 1 or len(v) == 1, \ - 'The number of source nodes and the number of destination nodes should be same, ' \ - 'or either the number of source nodes or the number of destination nodes is 1.' - - # fill up u and v - if len(u) == 1 and len(v) > 1: - u = F.full_1d(len(v), F.as_scalar(u), dtype=F.dtype(u), ctx=F.context(u)) - if len(v) == 1 and len(u) > 1: - v = F.full_1d(len(u), F.as_scalar(v), dtype=F.dtype(v), ctx=F.context(v)) - - u_type, e_type, v_type = g.to_canonical_etype(etype) - # if end nodes of adding edges does not exists - # use add_nodes to add new nodes first. - num_of_u = g.number_of_nodes(u_type) - num_of_v = g.number_of_nodes(v_type) - u_max = F.as_scalar(F.max(u, dim=0)) + 1 - v_max = F.as_scalar(F.max(v, dim=0)) + 1 - - if u_type == v_type: - num_nodes = max(u_max, v_max) - if num_nodes > num_of_u: - g = add_nodes(g, num_nodes - num_of_u, ntype=u_type) - else: - if u_max > num_of_u: - g = add_nodes(g, u_max - num_of_u, ntype=u_type) - if v_max > num_of_v: - g = add_nodes(g, v_max - num_of_v, ntype=v_type) - - num_nodes_dict = {} - for c_ntype in g.ntypes: - num_nodes_dict[c_ntype] = g.number_of_nodes(c_ntype) - - graph_data = {} - for c_etype in g.canonical_etypes: - old_u, old_v = g.edges(form='uv', order='eid', etype=c_etype) - if c_etype == (u_type, e_type, v_type): - graph_data[c_etype] = (F.cat([old_u, u], dim=0), - F.cat([old_v, v], dim=0)) - else: - graph_data[c_etype] = (old_u, old_v) - - new_g = heterograph(graph_data, - num_nodes_dict, - idtype=g.idtype, - device=g.device) - - # copy node features - for c_ntype in g.ntypes: - for key, val in g.nodes[c_ntype].data.items(): - new_g.nodes[c_ntype].data[key] = val - - # copy edge features - for c_etype in g.canonical_etypes: - if c_etype == (u_type, e_type, v_type): - edata = g.edges[c_etype].data - # existing features - for key, val in edata.items(): - if data is not None and data.get(key, None) is not None: - new_feats = utils.prepare_tensor(new_g, data[key], 'data') - assert len(new_feats) == len(u), \ - 'Data length of {} should be {}, but got {}'.format(key, - len(u), - len(new_feats)) - new_g.edges[c_etype].data[key] = F.cat([val, new_feats], dim=0) - else: - shape = F.shape(val) - shape = (len(u),) + shape[1:] - new_feats = F.zeros(shape, F.dtype(val), F.context(val)) - new_g.edges[c_etype].data[key] = F.cat([val, new_feats], dim=0) - - # non-existing features - if data is not None: - for key, val in data.items(): - if edata.get(key, None) is None: - shape = F.shape(data[key]) - shape = (g.number_of_edges(c_etype),) + shape[1:] - new_feats = utils.prepare_tensor(new_g, data[key], 'data') - assert len(new_feats) == len(u), \ - 'Data length of {} should be {}, but got {}'.format(key, - len(u), - len(new_feats)) - new_g.edges[c_etype].data[key] = \ - F.cat([F.zeros(shape, F.dtype(val), F.context(val)), - new_feats], dim=0) - else: - for key, val in g.edges[c_etype].data.items(): - new_g.edges[c_etype].data[key] = val - - return new_g + g = g.clone() + g.add_edges(u, v, data=data, etype=etype) + return g def remove_edges(g, eids, etype=None): r"""Remove multiple edges with the specified edge type. @@ -1094,37 +931,9 @@ def remove_edges(g, eids, etype=None): add_edges remove_nodes """ - # TODO(xiangsx): block do not support remove_edges - if etype is None: - if len(g.etypes) != 1: - raise DGLError('Edge type name must be specified if there are more than one ' \ - 'edge types.') - eids = utils.prepare_tensor(g, eids, 'u') - assert g.number_of_edges(etype) > F.as_scalar(F.max(eids, dim=0)), \ - 'The input eid {} is out of the range [0:{})'.format( - F.as_scalar(F.max(eids, dim=0)), g.number_of_edges(etype)) - - # edge_subgraph - edges = {} - etype = g.to_canonical_etype(etype) - for c_etype in g.canonical_etypes: - # the target edge type - if c_etype == etype: - old_eids = g.edges(form='eid', order='eid', etype=c_etype) - # trick here, eid_0 is 0 and should be handled - old_eids = F.scatter_row(old_eids, - F.copy_to(F.tensor(0, dtype=F.int64), - F.context(old_eids)), - F.copy_to(F.tensor(1, dtype=F.dtype(old_eids)), - F.context(old_eids))) - old_eids = F.scatter_row(old_eids, eids, F.full_1d( - len(eids), 0, F.dtype(old_eids), F.context(old_eids))) - edges[c_etype] = F.tensor(F.nonzero_1d(old_eids), dtype=F.dtype(old_eids)) - else: - edges[c_etype] = g.edges(form='eid', order='eid', etype=c_etype) - - sub_g = g.edge_subgraph(edges, preserve_nodes=True) - return sub_g + g = g.clone() + g.remove_edges(eids, etype=etype) + return g def remove_nodes(g, nids, ntype=None): @@ -1197,38 +1006,9 @@ def remove_nodes(g, nids, ntype=None): add_edges remove_edges """ - # TODO(xiangsx): block do not support remove_nodes - if ntype is None: - if len(g.ntypes) != 1: - raise DGLError('Node type name must be specified if there are more than one ' \ - 'node types.') - - nids = utils.prepare_tensor(g, nids, 'u') - assert g.number_of_nodes(ntype) > F.as_scalar(F.max(nids, dim=0)), \ - 'The input nids {} is out of the range [0:{})'.format( - F.as_scalar(F.max(nids, dim=0)), g.number_of_nodes(ntype)) - - nodes = {} - for c_ntype in g.ntypes: - # ntype is None: only one ntype - # c_ntype == ntype: ntype to add nodes - if ntype is None or c_ntype == ntype: - old_nids = g.nodes(c_ntype) - # trick here, nid_0 is 0 and should be handled - old_nids = F.scatter_row(old_nids, - F.copy_to(F.tensor(0, dtype=F.int64), - F.context(old_nids)), - F.copy_to(F.tensor(1, dtype=F.dtype(old_nids)), - F.context(old_nids))) - old_nids = F.scatter_row(old_nids, nids, F.full_1d( - len(nids), 0, F.dtype(old_nids), F.context(old_nids))) - nodes[c_ntype] = F.tensor(F.nonzero_1d(old_nids), dtype=F.dtype(old_nids)) - else: - nodes[c_ntype] = g.nodes(c_ntype) - - # node_subgraph - sub_g = g.subgraph(nodes) - return sub_g + g = g.clone() + g.remove_nodes(nids, ntype=ntype) + return g def add_self_loop(g, etype=None): r""" Add self loop for each node in the graph. @@ -1297,6 +1077,7 @@ def add_self_loop(g, etype=None): new_g = add_edges(g, nodes, nodes, etype=etype) return new_g +DGLHeteroGraph.add_self_loop = add_self_loop def remove_self_loop(g, etype=None): r""" Remove self loops for each node in the graph. @@ -1362,6 +1143,7 @@ def remove_self_loop(g, etype=None): new_g = remove_edges(g, self_loop_eids, etype=etype) return new_g +DGLHeteroGraph.remove_self_loop = remove_self_loop def reorder_nodes(g, new_node_ids): """ Generate a new graph with new node Ids. diff --git a/python/dgl/utils.py b/python/dgl/utils.py index 6e0fa55fbf4e..19f019e756ee 100644 --- a/python/dgl/utils.py +++ b/python/dgl/utils.py @@ -739,3 +739,28 @@ def check_all_same_device(glist, name): if g.device != device: raise DGLError('Expect {}[{}] to be on device {}, but got {}.'.format( name, i, device, g.device)) + +def compensate(ids, origin_ids): + """computing the compensate set of ids from origin_ids + + Note: ids should be a subset of origin_ids. + Any of ids and origin_ids can be non-consecutive, + and origin_ids should be sorted. + + Example: + >>> ids = th.Tensor([0, 2, 4]) + >>> origin_ids = th.Tensor([0, 1, 2, 4, 5]) + >>> compensate(ids, origin_ids) + th.Tensor([1, 5]) + """ + # trick here, eid_0 or nid_0 can be 0. + mask = F.scatter_row(origin_ids, + F.copy_to(F.tensor(0, dtype=F.int64), + F.context(origin_ids)), + F.copy_to(F.tensor(1, dtype=F.dtype(origin_ids)), + F.context(origin_ids))) + mask = F.scatter_row(mask, + ids, + F.full_1d(len(ids), 0, F.dtype(ids), F.context(ids))) + return F.tensor(F.nonzero_1d(mask), dtype=F.dtype(ids)) + diff --git a/tests/compute/test_heterograph.py b/tests/compute/test_heterograph.py index ac8c370fea15..be288587602f 100644 --- a/tests/compute/test_heterograph.py +++ b/tests/compute/test_heterograph.py @@ -2023,6 +2023,66 @@ def test_reverse(idtype): assert F.array_equal(g_s, rg_d) assert F.array_equal(g_d, rg_s) +@parametrize_dtype +def test_clone(idtype): + g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) + g.ndata['h'] = F.copy_to(F.tensor([1, 1, 1], dtype=idtype), ctx=F.ctx()) + g.edata['h'] = F.copy_to(F.tensor([1, 1], dtype=idtype), ctx=F.ctx()) + + new_g = g.clone() + assert g.number_of_nodes() == new_g.number_of_nodes() + assert g.number_of_edges() == new_g.number_of_edges() + assert g.device == new_g.device + assert g.idtype == new_g.idtype + assert F.array_equal(g.ndata['h'], new_g.ndata['h']) + assert F.array_equal(g.edata['h'], new_g.edata['h']) + # data change + new_g.ndata['h'] = F.copy_to(F.tensor([2, 2, 2], dtype=idtype), ctx=F.ctx()) + assert F.array_equal(g.ndata['h'], new_g.ndata['h']) is False + g.edata['h'] = F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx()) + assert F.array_equal(g.edata['h'], new_g.edata['h']) is False + # graph structure change + g.add_nodes(1) + assert g.number_of_nodes() != new_g.number_of_nodes() + new_g.add_edges(1, 1) + assert g.number_of_edges() != new_g.number_of_edges() + + # zero data graph + g = dgl.graph([], num_nodes=0, idtype=idtype, device=F.ctx()) + new_g = g.clone() + assert g.number_of_nodes() == new_g.number_of_nodes() + assert g.number_of_edges() == new_g.number_of_edges() + + # heterograph + g = create_test_heterograph4(idtype) + g.edges['plays'].data['h'] = F.copy_to(F.tensor([1, 2, 3, 4], dtype=idtype), ctx=F.ctx()) + new_g = g.clone() + assert g.number_of_nodes('user') == new_g.number_of_nodes('user') + assert g.number_of_nodes('game') == new_g.number_of_nodes('game') + assert g.number_of_nodes('developer') == new_g.number_of_nodes('developer') + assert g.number_of_edges('plays') == new_g.number_of_edges('plays') + assert g.number_of_edges('develops') == new_g.number_of_edges('develops') + assert F.array_equal(g.nodes['user'].data['h'], new_g.nodes['user'].data['h']) + assert F.array_equal(g.nodes['game'].data['h'], new_g.nodes['game'].data['h']) + assert F.array_equal(g.edges['plays'].data['h'], new_g.edges['plays'].data['h']) + assert g.device == new_g.device + assert g.idtype == new_g.idtype + u, v = g.edges(form='uv', order='eid', etype='plays') + nu, nv = new_g.edges(form='uv', order='eid', etype='plays') + assert F.array_equal(u, nu) + assert F.array_equal(v, nv) + # graph structure change + u = F.tensor([0, 4], dtype=idtype) + v = F.tensor([2, 6], dtype=idtype) + g.add_edges(u, v, etype='plays') + u, v = g.edges(form='uv', order='eid', etype='plays') + assert F.array_equal(u, nu) is False + assert F.array_equal(v, nv) is False + assert F.array_equal(g.nodes['user'].data['h'], new_g.nodes['user'].data['h']) is False + assert F.array_equal(g.nodes['game'].data['h'], new_g.nodes['game'].data['h']) is False + assert F.array_equal(g.edges['plays'].data['h'], new_g.edges['plays'].data['h']) is False + + @parametrize_dtype def test_add_edges(idtype): # homogeneous graph @@ -2408,89 +2468,6 @@ def test_remove_nodes(idtype): assert F.array_equal(u, F.tensor([1], dtype=idtype)) assert F.array_equal(v, F.tensor([0], dtype=idtype)) - -@parametrize_dtype -def test_add_selfloop(idtype): - # homogeneous graph - g = dgl.graph(([0, 0, 2], [2, 1, 0]), idtype=idtype, device=F.ctx()) - g.edata['he'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx()) - g.ndata['hn'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx()) - g.add_self_loop() - assert g.number_of_nodes() == 3 - assert g.number_of_edges() == 6 - u, v = g.edges(form='uv', order='eid') - assert F.array_equal(u, F.tensor([0, 0, 2, 0, 1, 2], dtype=idtype)) - assert F.array_equal(v, F.tensor([2, 1, 0, 0, 1, 2], dtype=idtype)) - assert F.array_equal(g.edata['he'], F.tensor([1, 2, 3, 0, 0, 0], dtype=idtype)) - - # bipartite graph - g = dgl.bipartite(([0, 1, 2], [1, 2, 2]), 'user', 'plays', 'game', idtype=idtype, device=F.ctx()) - # nothing will happend - raise_error = False - try: - g.add_self_loop() - except: - raise_error = True - assert raise_error - - g = create_test_heterograph6(idtype) - g.add_self_loop(etype='follows') - assert g.number_of_nodes('user') == 3 - assert g.number_of_nodes('game') == 2 - assert g.number_of_edges('follows') == 5 - assert g.number_of_edges('plays') == 2 - u, v = g.edges(form='uv', order='eid', etype='follows') - assert F.array_equal(u, F.tensor([1, 2, 0, 1, 2], dtype=idtype)) - assert F.array_equal(v, F.tensor([0, 1, 0, 1, 2], dtype=idtype)) - assert F.array_equal(g.edges['follows'].data['h'], F.tensor([1, 2, 0, 0, 0], dtype=idtype)) - assert F.array_equal(g.edges['plays'].data['h'], F.tensor([1, 2], dtype=idtype)) - - raise_error = False - try: - g.add_self_loop(etype='plays') - except: - raise_error = True - assert raise_error - -@parametrize_dtype -def test_remove_selfloop(idtype): - # homogeneous graph - g = dgl.graph(([0, 0, 0, 1], [1, 0, 0, 2]), idtype=idtype, device=F.ctx()) - g.edata['he'] = F.copy_to(F.tensor([1, 2, 3, 4], dtype=idtype), ctx=F.ctx()) - g.remove_self_loop() - assert g.number_of_nodes() == 3 - assert g.number_of_edges() == 2 - assert F.array_equal(g.edata['he'], F.tensor([1, 4], dtype=idtype)) - - # bipartite graph - g = dgl.bipartite(([0, 1, 2], [1, 2, 2]), 'user', 'plays', 'game', idtype=idtype, device=F.ctx()) - # nothing will happend - raise_error = False - try: - g.remove_self_loop(etype='plays') - except: - raise_error = True - assert raise_error - - g = create_test_heterograph5(idtype) - g.remove_self_loop(etype='follows') - assert g.number_of_nodes('user') == 3 - assert g.number_of_nodes('game') == 2 - assert g.number_of_edges('follows') == 2 - assert g.number_of_edges('plays') == 2 - u, v = g.edges(form='uv', order='eid', etype='follows') - assert F.array_equal(u, F.tensor([1, 2], dtype=idtype)) - assert F.array_equal(v, F.tensor([0, 1], dtype=idtype)) - assert F.array_equal(g.edges['follows'].data['h'], F.tensor([2, 4], dtype=idtype)) - assert F.array_equal(g.edges['plays'].data['h'], F.tensor([1, 2], dtype=idtype)) - - raise_error = False - try: - g.remove_self_loop(etype='plays') - except: - raise_error = True - assert raise_error - if __name__ == '__main__': # test_create() # test_query() @@ -2523,6 +2500,5 @@ def test_remove_selfloop(idtype): test_add_nodes(F.int32) test_remove_edges(F.int32) test_remove_nodes(F.int32) - test_add_selfloop(F.int32) - test_remove_selfloop(F.int32) + test_clone(F.int32) pass From fda0e0405ac3cc0e87a7950cdc327236400fc87c Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 21 Jul 2020 01:15:15 +0000 Subject: [PATCH 11/15] Fix --- examples/pytorch/rgcn/entity_classify.py | 2 +- python/dgl/heterograph.py | 2 +- python/dgl/transform.py | 2 +- python/dgl/utils.py | 1 - 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/examples/pytorch/rgcn/entity_classify.py b/examples/pytorch/rgcn/entity_classify.py index 2aad68af1875..c6010948db43 100644 --- a/examples/pytorch/rgcn/entity_classify.py +++ b/examples/pytorch/rgcn/entity_classify.py @@ -22,7 +22,7 @@ class EntityClassify(BaseRGCN): def create_features(self): - features = torch.ones(self.num_nodes) + features = torch.arange(self.num_nodes) if self.use_cuda: features = features.cuda() return features diff --git a/python/dgl/heterograph.py b/python/dgl/heterograph.py index fcd7205a8ff2..8bd52a8dfda7 100644 --- a/python/dgl/heterograph.py +++ b/python/dgl/heterograph.py @@ -4356,7 +4356,7 @@ def clone(self): The graph object that is a clone of current graph. """ meta_edges = [] - for s_ntype, e_type, d_ntype in self.canonical_etypes: + for s_ntype, _, d_ntype in self.canonical_etypes: meta_edges.append((self.get_ntype_id(s_ntype), self.get_ntype_id(d_ntype))) metagraph = graph_index.from_edge_list(meta_edges, True) diff --git a/python/dgl/transform.py b/python/dgl/transform.py index 5ef8b2ba850e..732e24b60e2b 100644 --- a/python/dgl/transform.py +++ b/python/dgl/transform.py @@ -12,7 +12,7 @@ from .graph_index import from_coo from .graph_index import _get_halo_subgraph_inner_node from .graph import unbatch -from .convert import graph, bipartite, heterograph +from .convert import graph, bipartite from . import utils from .base import EID, NID from . import ndarray as nd diff --git a/python/dgl/utils.py b/python/dgl/utils.py index 19f019e756ee..0348eeea27fb 100644 --- a/python/dgl/utils.py +++ b/python/dgl/utils.py @@ -763,4 +763,3 @@ def compensate(ids, origin_ids): ids, F.full_1d(len(ids), 0, F.dtype(ids), F.context(ids))) return F.tensor(F.nonzero_1d(mask), dtype=F.dtype(ids)) - From b5dee42cdb5b9704d6d1c92d70e8c2dc950cbecb Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 21 Jul 2020 01:54:24 +0000 Subject: [PATCH 12/15] fix --- tests/compute/test_heterograph.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/compute/test_heterograph.py b/tests/compute/test_heterograph.py index be288587602f..9f1d05ee974f 100644 --- a/tests/compute/test_heterograph.py +++ b/tests/compute/test_heterograph.py @@ -2038,9 +2038,9 @@ def test_clone(idtype): assert F.array_equal(g.edata['h'], new_g.edata['h']) # data change new_g.ndata['h'] = F.copy_to(F.tensor([2, 2, 2], dtype=idtype), ctx=F.ctx()) - assert F.array_equal(g.ndata['h'], new_g.ndata['h']) is False + assert (F.array_equal(g.ndata['h'], new_g.ndata['h']) == False) g.edata['h'] = F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx()) - assert F.array_equal(g.edata['h'], new_g.edata['h']) is False + assert (F.array_equal(g.edata['h'], new_g.edata['h']) == False) # graph structure change g.add_nodes(1) assert g.number_of_nodes() != new_g.number_of_nodes() @@ -2076,11 +2076,11 @@ def test_clone(idtype): v = F.tensor([2, 6], dtype=idtype) g.add_edges(u, v, etype='plays') u, v = g.edges(form='uv', order='eid', etype='plays') - assert F.array_equal(u, nu) is False - assert F.array_equal(v, nv) is False - assert F.array_equal(g.nodes['user'].data['h'], new_g.nodes['user'].data['h']) is False - assert F.array_equal(g.nodes['game'].data['h'], new_g.nodes['game'].data['h']) is False - assert F.array_equal(g.edges['plays'].data['h'], new_g.edges['plays'].data['h']) is False + assert (F.array_equal(u, nu) == False) + assert (F.array_equal(v, nv) == False) + assert (F.array_equal(g.nodes['user'].data['h'], new_g.nodes['user'].data['h']) == False) + assert (F.array_equal(g.nodes['game'].data['h'], new_g.nodes['game'].data['h']) == False) + assert (F.array_equal(g.edges['plays'].data['h'], new_g.edges['plays'].data['h']) == False) @parametrize_dtype From 1f6b7630984ddccb06a31376cb46f86a97d6b414 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 21 Jul 2020 02:16:46 +0000 Subject: [PATCH 13/15] triger From 0c9153b36bbd38f969bd84d85ad7311b6bcfbd50 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 21 Jul 2020 02:34:37 +0000 Subject: [PATCH 14/15] Fix --- tests/compute/test_heterograph.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/compute/test_heterograph.py b/tests/compute/test_heterograph.py index 9f1d05ee974f..578dab13d02c 100644 --- a/tests/compute/test_heterograph.py +++ b/tests/compute/test_heterograph.py @@ -2078,9 +2078,9 @@ def test_clone(idtype): u, v = g.edges(form='uv', order='eid', etype='plays') assert (F.array_equal(u, nu) == False) assert (F.array_equal(v, nv) == False) - assert (F.array_equal(g.nodes['user'].data['h'], new_g.nodes['user'].data['h']) == False) - assert (F.array_equal(g.nodes['game'].data['h'], new_g.nodes['game'].data['h']) == False) - assert (F.array_equal(g.edges['plays'].data['h'], new_g.edges['plays'].data['h']) == False) + assert g.nodes['user'].data['h'].shape[0] != new_g.nodes['user'].data['h'].shape[0] + assert g.nodes['game'].data['h'].shape[0] != new_g.nodes['game'].data['h'].shape[0] + assert g.edges['plays'].data['h'].shape[0] != new_g.edges['plays'].data['h'].shape[0] @parametrize_dtype From a27dc167f0756df70cbc6201cd4799187a28010e Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 21 Jul 2020 02:52:35 +0000 Subject: [PATCH 15/15] fix --- tests/compute/test_heterograph.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/compute/test_heterograph.py b/tests/compute/test_heterograph.py index 578dab13d02c..86d88bd52d47 100644 --- a/tests/compute/test_heterograph.py +++ b/tests/compute/test_heterograph.py @@ -2076,8 +2076,8 @@ def test_clone(idtype): v = F.tensor([2, 6], dtype=idtype) g.add_edges(u, v, etype='plays') u, v = g.edges(form='uv', order='eid', etype='plays') - assert (F.array_equal(u, nu) == False) - assert (F.array_equal(v, nv) == False) + assert u.shape[0] != nu.shape[0] + assert v.shape[0] != nv.shape[0] assert g.nodes['user'].data['h'].shape[0] != new_g.nodes['user'].data['h'].shape[0] assert g.nodes['game'].data['h'].shape[0] != new_g.nodes['game'].data['h'].shape[0] assert g.edges['plays'].data['h'].shape[0] != new_g.edges['plays'].data['h'].shape[0]