-
Notifications
You must be signed in to change notification settings - Fork 1
/
articulation.py
182 lines (147 loc) · 5.11 KB
/
articulation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
import sys
import time
import networkx as nx
from pyspark import SparkContext
from pyspark.sql import SQLContext
from pyspark.sql import functions
from graphframes import *
from collections import defaultdict
from copy import deepcopy
sc=SparkContext("local", "degree.py")
sqlContext = SQLContext(sc)
# Python program to find articulation points in an undirected graph
# This class represents an undirected graph
# using adjacency list representation
class Graph:
def __init__(self, vertices):
self.V = vertices # No. of vertices
self.graph = defaultdict(list) # default dictionary to store graph
self.Time = 0
# function to add an edge to graph
def addEdge(self, u, v):
self.graph[u].append(v)
self.graph[v].append(u)
'''A recursive function that find articulation points
using DFS traversal
u --> The vertex to be visited next
visited[] --> keeps track of visited vertices
disc[] --> Stores discovery times of visited vertices
parent[] --> Stores parent vertices in DFS tree
ap[] --> Store articulation points'''
def APUtil(self, u, visited, ap, parent, low, disc):
# Count of children in current node
children = 0
# Mark the current node as visited and print it
visited[u]= True
# Initialize discovery time and low value
disc[u] = self.Time
low[u] = self.Time
self.Time += 1
# Recur for all the vertices adjacent to this vertex
for v in self.graph[u]:
# If v is not visited yet, then make it a child of u
# in DFS tree and recur for it
if visited[v] == False :
parent[v] = u
children += 1
self.APUtil(v, visited, ap, parent, low, disc)
# Check if the subtree rooted with v has a connection to
# one of the ancestors of u
low[u] = min(low[u], low[v])
# u is an articulation point in following cases
# (1) u is root of DFS tree and has two or more children.
if parent[u] == -1 and children > 1:
ap[u] = True
#(2) If u is not root and low value of one of its child is more
# than discovery value of u.
if parent[u] != -1 and low[v] >= disc[u]:
ap[u] = True
# Update low value of u for parent function calls
elif v != parent[u]:
low[u] = min(low[u], disc[v])
# The function to do DFS traversal. It uses recursive APUtil()
def AP(self):
# Mark all the vertices as not visited
# and Initialize parent and visited,
# and ap(articulation point) arrays
visited = [False] * (self.V)
disc = [float("Inf")] * (self.V)
low = [float("Inf")] * (self.V)
parent = [-1] * (self.V)
ap = [False] * (self.V) # To store articulation points
# Call the recursive helper function
# to find articulation points
# in DFS tree rooted with vertex 'i'
for i in range(self.V):
if visited[i] == False:
self.APUtil(i, visited, ap, parent, low, disc)
for index, value in enumerate (ap):
if value == True: print index,
# Create a graph given in the above diagram
g1 = Graph(5)
g1.addEdge(1, 0)
g1.addEdge(0, 2)
g1.addEdge(2, 1)
g1.addEdge(0, 3)
g1.addEdge(3, 4)
print "\nArticulation points in first graph "
g1.AP()
g2 = Graph(4)
g2.addEdge(0, 1)
g2.addEdge(1, 2)
g2.addEdge(2, 3)
print "\nArticulation points in second graph "
g2.AP()
g3 = Graph (7)
g3.addEdge(0, 1)
g3.addEdge(1, 2)
g3.addEdge(2, 0)
g3.addEdge(1, 3)
g3.addEdge(1, 4)
g3.addEdge(1, 6)
g3.addEdge(3, 5)
g3.addEdge(4, 5)
print "\nArticulation points in third graph "
g3.AP()
# This code is contributed by Neelam Yadav
def articulations(g, usegraphframe=False):
# Get the starting count of connected components
# YOUR CODE HERE
# Default version sparkifies the connected components process
# and serializes node iteration.
if usegraphframe:
# Get vertex list for serial iteration
# YOUR CODE HERE
# For each vertex, generate a new graphframe missing that vertex
# and calculate connected component count. Then append count to
# the output
# YOUR CODE HERE
# Non-default version sparkifies node iteration and uses networkx
# for connected components count.
else:
# YOUR CODE HERE
filename = sys.argv[1]
lines = sc.textFile(filename)
pairs = lines.map(lambda s: s.split(","))
e = sqlContext.createDataFrame(pairs,['src','dst'])
e = e.unionAll(e.selectExpr('src as dst','dst as src')).distinct() # Ensure undirectedness
# Extract all endpoints from input file and make a single column frame.
v = e.selectExpr('src as id').unionAll(e.selectExpr('dst as id')).distinct()
# Create graphframe from the vertices and edges.
g = GraphFrame(v,e)
#Runtime approximately 5 minutes
print("---------------------------")
print("Processing graph using Spark iteration over nodes and serial (networkx) connectedness calculations")
init = time.time()
df = articulations(g, False)
print("Execution time: %s seconds" % (time.time() - init))
print("Articulation points:")
df.filter('articulation = 1').show(truncate=False)
print("---------------------------")
#Runtime for below is more than 2 hours
print("Processing graph using serial iteration over nodes and GraphFrame connectedness calculations")
init = time.time()
df = articulations(g, True)
print("Execution time: %s seconds" % (time.time() - init))
print("Articulation points:")
df.filter('articulation = 1').show(truncate=False)