We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
由于上周采取的用Windows来跑本地的demo,为了后续的进一步操作,本周采取了搭建虚拟机,用docker来运行项目的方法。遇到的问题有:虚拟机的搭建,环境的基本配置(maven,spark,java等等),git的拉取项目到本地,以及实际运行。由于之前未接触过相关知识与领域,耽误了一定时间。
参考的链接:https://blog.csdn.net/u012151283/article/details/87255951
def _get_order_degreelist_node(self, root, max_num_layers=None): if max_num_layers is None: max_num_layers = float('inf') ordered_degree_sequence_dict = {} visited = [False] * len(self.graph.nodes()) queue = deque() level = 0 queue.append(root) visited[root] = True while (len(queue) > 0 and level <= max_num_layers): count = len(queue) if self.opt1_reduce_len: degree_list = {} else: degree_list = [] while (count > 0): top = queue.popleft() node = self.idx2node[top] degree = len(self.graph[node]) if self.opt1_reduce_len: degree_list[degree] = degree_list.get(degree, 0) + 1 else: degree_list.append(degree) for nei in self.graph[node]: nei_idx = self.node2idx[nei] if not visited[nei_idx]: visited[nei_idx] = True queue.append(nei_idx) count -= 1 if self.opt1_reduce_len: orderd_degree_list = [(degree, freq) for degree, freq in degree_list.items()] orderd_degree_list.sort(key=lambda x: x[0]) else: orderd_degree_list = sorted(degree_list) ordered_degree_sequence_dict[level] = orderd_degree_list level += 1 return ordered_degree_sequence_dict
def _compute_ordered_degreelist(self, max_num_layers): degreeList = {} vertices = self.idx # self.g.nodes() for v in vertices: degreeList[v] = self._get_order_degreelist_node(v, max_num_layers) return degreeList
def compute_dtw_dist(part_list, degreeList, dist_func): dtw_dist = {} for v1, nbs in part_list: lists_v1 = degreeList[v1] # lists_v1 :orderd degree list of v1 for v2 in nbs: lists_v2 = degreeList[v2] # lists_v1 :orderd degree list of v2 max_layer = min(len(lists_v1), len(lists_v2)) # valid layer dtw_dist[v1, v2] = {} for layer in range(0, max_layer): dist, path = fastdtw( lists_v1[layer], lists_v2[layer], radius=1, dist=dist_func) dtw_dist[v1, v2][layer] = dist return dtw_dist
def _compute_structural_distance(self, max_num_layers, workers=1, verbose=0,): if os.path.exists(self.temp_path+'structural_dist.pkl'): structural_dist = pd.read_pickle( self.temp_path+'structural_dist.pkl') else: if self.opt1_reduce_len: dist_func = cost_max else: dist_func = cost if os.path.exists(self.temp_path + 'degreelist.pkl'): degreeList = pd.read_pickle(self.temp_path + 'degreelist.pkl') else: degreeList = self._compute_ordered_degreelist(max_num_layers) pd.to_pickle(degreeList, self.temp_path + 'degreelist.pkl') if self.opt2_reduce_sim_calc: degrees = self._create_vectors() degreeListsSelected = {} vertices = {} n_nodes = len(self.idx) for v in self.idx: # c:list of vertex nbs = get_vertices( v, len(self.graph[self.idx2node[v]]), degrees, n_nodes) vertices[v] = nbs # store nbs degreeListsSelected[v] = degreeList[v] # store dist for n in nbs: # store dist of nbs degreeListsSelected[n] = degreeList[n] else: vertices = {} for v in degreeList: vertices[v] = [vd for vd in degreeList.keys() if vd > v] results = Parallel(n_jobs=workers, verbose=verbose,)( delayed(compute_dtw_dist)(part_list, degreeList, dist_func) for part_list in partition_dict(vertices, workers)) dtw_dist = dict(ChainMap(*results)) structural_dist = convert_dtw_struc_dist(dtw_dist) pd.to_pickle(structural_dist, self.temp_path + 'structural_dist.pkl') return structural_dist
def _get_transition_probs(self, layers_adj, layers_distances): layers_alias = {} layers_accept = {} for layer in layers_adj: neighbors = layers_adj[layer] layer_distances = layers_distances[layer] node_alias_dict = {} node_accept_dict = {} norm_weights = {} for v, neighbors in neighbors.items(): e_list = [] sum_w = 0.0 for n in neighbors: if (v, n) in layer_distances: wd = layer_distances[v, n] else: wd = layer_distances[n, v] w = np.exp(-float(wd)) e_list.append(w) sum_w += w e_list = [x / sum_w for x in e_list] norm_weights[v] = e_list accept, alias = create_alias_table(e_list) node_alias_dict[v] = alias node_accept_dict[v] = accept pd.to_pickle( norm_weights, self.temp_path + 'norm_weights_distance-layer-' + str(layer)+'.pkl') layers_alias[layer] = node_alias_dict layers_accept[layer] = node_accept_dict return layers_accept, layers_alias
def prepare_biased_walk(self,): sum_weights = {} sum_edges = {} average_weight = {} gamma = {} layer = 0 while (os.path.exists(self.temp_path+'norm_weights_distance-layer-' + str(layer))): probs = pd.read_pickle( self.temp_path+'norm_weights_distance-layer-' + str(layer)) for v, list_weights in probs.items(): sum_weights.setdefault(layer, 0) sum_edges.setdefault(layer, 0) sum_weights[layer] += sum(list_weights) sum_edges[layer] += len(list_weights) average_weight[layer] = sum_weights[layer] / sum_edges[layer] gamma.setdefault(layer, {}) for v, list_weights in probs.items(): num_neighbours = 0 for w in list_weights: if (w > average_weight[layer]): num_neighbours += 1 gamma[layer][v] = num_neighbours layer += 1 pd.to_pickle(average_weight, self.temp_path + 'average_weight') pd.to_pickle(gamma, self.temp_path + 'gamma.pkl')
The text was updated successfully, but these errors were encountered:
No branches or pull requests
本周任务:不同层次结构相似度计算,struct2Vec论文学习。
实际情况:
1.上周的优化
由于上周采取的用Windows来跑本地的demo,为了后续的进一步操作,本周采取了搭建虚拟机,用docker来运行项目的方法。遇到的问题有:虚拟机的搭建,环境的基本配置(maven,spark,java等等),git的拉取项目到本地,以及实际运行。由于之前未接触过相关知识与领域,耽误了一定时间。
2.其他,进行了论文的进一步学习。基本分为以下几个步骤。(:可参考 https://zhuanlan.zhihu.com/p/56733145)
a.算法原理:Struc2Vec是从空间结构相似性的角度定义顶点相似度的。
b.定点对距离的定义。
c.构建层次带权图。
d.采样获取顶点序列。
e.优化
3.基本算法实现的学习
参考的链接:https://blog.csdn.net/u012151283/article/details/87255951
a.对距离定义的实现:
b.构建层次带权图
4.下周目标:结合上周学习,用angel实际实现层次带权图的构建。尝试实现多层次带权网络。
The text was updated successfully, but these errors were encountered: