-
Notifications
You must be signed in to change notification settings - Fork 0
/
kMatrix.py
114 lines (103 loc) · 3.59 KB
/
kMatrix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# Generate K matrix from the following datasets
# - COLLAB
# - IMDB
# - PROTEINS
# - MUTAG
# The K matrix calculates the degree between the different nodes in the graph such as source and target node.
import pandas as pd
import numpy as np
import torch_geometric
from get_dataset import Dataset
import networkx as nx
import argparse
def read_args():
parser = argparse.ArgumentParser()
parser.add_argument("--name", type = str, default = "MUTAG", help = "the dataset name")
parser.add_argument("--khops", type = int, default = 2, help = "K hope value for generating k matrix")
parser.add_argument("--graph_index", type = int, default = 0, help = "sub graph index")
opt = parser.parse_args()
return opt
def tensor_to_list(tensor):
array = tensor.numpy()
array2list = list(array)
return array2list
def get_src_target_nodes(graph):
"""
Params:
graph: torch_geometric.datasets.TUDataset
return:
source_nodes: list
target_nodes: list
"""
source_nodes = graph["edge_index"][0, :]
target_nodes = graph["edge_index"][1, :]
source_nodes = tensor_to_list(source_nodes)
target_nodes = tensor_to_list(target_nodes)
return source_nodes, target_nodes
def create_graph(graph, source_nodes, target_nodes):
"""
Parameters
----------
graph: torch_geometric
source_nodes: list
target_nodes: list
Return
------
spl: nx.Graph.shortest_path
new_graph: nx.Graph
k_matrix: np.ndarray
"""
unique_nodes = list(np.unique(graph["edge_index"]))
num_nodes = len(unique_nodes)
k_matrix = np.zeros((num_nodes, num_nodes))
new_graph = nx.Graph()
new_graph.add_nodes_from(unique_nodes)
edge_info = list(zip(source_nodes, target_nodes))
new_graph.add_edges_from(edge_info)
spl = dict(nx.all_pairs_shortest_path_length(new_graph))
return spl, k_matrix, unique_nodes
def calculateLength(a, b, spl):
try:
return spl[a][b]
except KeyError:
return 0
def save_data_csv(K, nodes_list):
'''convert to graph data to a csv file for graph generation
Args: K (numpy array of graph relations) 2708x2708
nodes_list: (list) of nodes
'''
Kmatrix = {}
for i, cols in enumerate(nodes_list):
Kmatrix[f"{cols}"] = K[:, i].tolist()
kMatrix_data = pd.DataFrame(Kmatrix)
drop_col = graph_data.columns.tolist()[0]
kMatrix_data = kMatrix_data.drop(drop_col, axis=1)
kMatrix_data.to_csv("kMatrix_data.csv", sep=',')
def generate_k_matrix(initial_k_matrix,
unique_nodes,
spl,
k_hops = None):
kmatrix = initial_k_matrix
unique_nodes.sort()
if k_hops is not None:
for i, row in enumerate(unique_nodes):
for j, col in enumerate(unique_nodes):
length = calculateLength(row, col, spl)
if length <= k_hops and length != 0:
kmatrix[i, j] = 1
else:
kmatrix[i, j] = 0
# saving the k matrix into a csv file...
save_data_csv(kmatrix, unique_nodes)
return kmatrix
if __name__ == "__main__":
args = read_args()
NAME = args.name.upper()
dataset = Dataset(f"{NAME}", save_dir= f"../datasets/{NAME}")
data = dataset.return_dataset()
first_graph = data[args.graph_index]
src_nodes, target_nodes = get_src_target_nodes(first_graph)
spl, kmatrix, unique_nodes = create_graph(first_graph, src_nodes, target_nodes)
kmatrix = generate_k_matrix(kmatrix, unique_nodes, spl, args.khops)
print(f"K matrix generated: \n{kmatrix}")
print(f'K matrix shape: {kmatrix.shape}')