Skip to content
This repository has been archived by the owner on Oct 8, 2021. It is now read-only.

Reduced time & memory footprint for Tarjans algorithm, fixed a bug where it was O(E^2) on star graphs. #1559

Open
wants to merge 21 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 50 additions & 53 deletions src/connectivity.jl
Original file line number Diff line number Diff line change
Expand Up @@ -219,31 +219,31 @@ julia> strongly_connected_components(g)

function strongly_connected_components end
# see https://github.com/mauro3/SimpleTraits.jl/issues/47#issuecomment-327880153 for syntax
@traitfn function strongly_connected_components(g::AG::IsDirected) where {T<:Integer, AG <: AbstractGraph{T}}
empty_graph_data(type,g::AG) where {T, AG <: AbstractGraph{T}} = Dict{T,type}()
empty_graph_data(type,g::AG) where {T<:Integer, AG <: AbstractGraph{T}} = zeros(type,nv(g))
is_unvisited(data::AbstractVector,v::Integer) = iszero(data[v])
is_unvisited(data::Dict,v) = !haskey(data,v)

@traitfn function strongly_connected_components_modified(g::AG::IsDirected) where {T, AG <: AbstractGraph{T}}
zero_t = zero(T)
one_t = one(T)
nvg = nv(g)
count = one_t


index = zeros(T, nvg) # first time in which vertex is discovered
stack = Vector{T}() # stores vertices which have been discovered and not yet assigned to any component
onstack = zeros(Bool, nvg) # false if a vertex is waiting in the stack to receive a component assignment
lowlink = zeros(T, nvg) # lowest index vertex that it can reach through back edge (index array not vertex id number)
parents = zeros(T, nvg) # parent of every vertex in dfs
count = 1 # Visitation order for the branch being explored. Backtracks when we pop an scc.
component_count = nvg - 1 # Reversed Index of the current component being discovered.
# Invariant: count is always smaller than component_count.
# This lets us tell if a node belongs to a previously discovered scc without any extra bits.

component_root = empty_graph_data(Bool,g)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
component_root = empty_graph_data(Bool,g)
is_component_root = empty_graph_data(Bool,g)

a Dict of type Dict{T, Bool} can usually be replaced by a Set{T}. In this case, you could also think about using a Vector{Bool} or a BitVector} although with these you will waste some space in case we only have a few strongly connected components.

Copy link
Author

@saolof saolof Apr 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, it's using a Vector{Bool} for now whenever T is an Integer. I'm planning to do some benchmarking with bitvector once I have a decent solution to make the loop nonquadratic.

The main issue with using anything that doesn't give you a pointer to a bool is that the boolean gets manipulated in a tight loop which is fast because of branch prediction/speculative execution. But of course adding a variable outside the loop and then storing its result is an option

rindex = empty_graph_data(Int,g) # The arrays should not be T-valued for integer T, the rindexes should be the same type as nvg
components = Vector{Vector{T}}() # maintains a list of scc (order is not guaranteed in API)


stack = Vector{T}() # stores vertices which have been discovered and not yet assigned to any component
dfs_stack = Vector{T}()

@inbounds for s in vertices(g)
if index[s] == zero_t
index[s] = count
lowlink[s] = count
onstack[s] = true
parents[s] = s
push!(stack, s)
count = count + one_t
if is_unvisited(rindex,s)
rindex[s] = count
component_root[s] = true
count += 1

# start dfs from 's'
push!(dfs_stack, s)
Expand All @@ -252,66 +252,63 @@ function strongly_connected_components end
v = dfs_stack[end] #end is the most recently added item
u = zero_t
@inbounds for v_neighbor in outneighbors(g, v)
if index[v_neighbor] == zero_t
# unvisited neighbor found
if is_unvisited(rindex,v_neighbor)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if is_unvisited(rindex,v_neighbor)
if is_unvisited(rindex, v_neighbor)

in general, would suggest to add a space after a comma in arguments lists for better readability and consistency with the other code.

u = v_neighbor
break
#GOTO A push u onto DFS stack and continue DFS
elseif onstack[v_neighbor]
# we have already seen n, but can update the lowlink of v
# which has the effect of possibly keeping v on the stack until n is ready to pop.
# update lowest index 'v' can reach through out neighbors
lowlink[v] = min(lowlink[v], index[v_neighbor])
# TODO: This is accidentally quadratic for tournament graphs or star graphs.
# Breaking the for loop to resume it from the beginning when returning leads to issues for graphs with high node orders like the star graph.
# One option is to save the iteration state in a third stack, but there may be other approaches.
elseif (rindex[v_neighbor] < rindex[v])
rindex[v] = rindex[v_neighbor]
component_root[v] = false
end
end
if u == zero_t
saolof marked this conversation as resolved.
Show resolved Hide resolved
# All out neighbors already visited or no out neighbors
# we have fully explored the DFS tree from v.
# time to start popping.
popped = pop!(dfs_stack)
lowlink[parents[popped]] = min(lowlink[parents[popped]], lowlink[popped])

if index[v] == lowlink[v]
# found a cycle in a completed dfs tree.
component = Vector{T}()

while !isempty(stack) #break when popped == v
# drain stack until we see v.
# everything on the stack until we see v is in the SCC rooted at v.
popped = pop!(stack)
push!(component, popped)
onstack[popped] = false
# popped has been assigned a component, so we will never see it again.
if popped == v
# we have drained the stack of an entire component.
break
end
if component_root[popped] # Found an SCC rooted at popped which is a bottom cycle in remaining graph.
component = T[popped]
count -= 1 # We also backtrack the count to reset it to what it would be if the component were never in the graph.
while !isempty(stack) && (rindex[popped] <= rindex[stack[end]]) # Keep popping its children from the backtracking stack.
newpopped = pop!(stack)
rindex[newpopped] = component_count # Bigger than the value of anything unexplored.
push!(component, newpopped) # popped has been assigned a component, so we will never see it again.
count -=1
end
rindex[popped] = component_count
component_count -= 1
push!(components,component)
else # Invariant: the DFS stack can never be empty in this second branch where popped is not a root.
if (rindex[popped] < rindex[dfs_stack[end]])
rindex[dfs_stack[end]] = rindex[popped]
component_root[dfs_stack[end]] = false
end

reverse!(component)
push!(components, component)
# Because we only push to stack when backtracking, it gets filled up less than in Tarjan's original algorithm.
# For DAG inputs, the stack variable never gets touched at all.
push!(stack,popped)
end

else #LABEL A
# add unvisited neighbor to dfs
index[u] = count
lowlink[u] = count
onstack[u] = true
parents[u] = v
count = count + one_t

push!(stack, u)
push!(dfs_stack, u)
component_root[u] = true
rindex[u] = count
count += 1
# next iteration of while loop will expand the DFS tree from u.
end
end
end
end

#Unlike in the original Tajans, rindex are potentially also worth returning here.
# Lowlink values are topologically sorted in the same order as components.
# Scipy's graph library returns only that and lets the user sort by its values.
return components
end


"""
strongly_connected_components_kosaraju(g)

Expand Down