From 194aaadc5556e2a53a76339e9bdb5b26e607a05a Mon Sep 17 00:00:00 2001 From: Thomas Christensen Date: Mon, 18 Mar 2024 16:45:52 +0100 Subject: [PATCH] `all_simple_paths`: update PR #20 - this updates the port of https://github.com/sbromberger/LightGraphs.jl/pull/1540 from #20 - has a number of simplifications relative to original implementation - original implementation by @i-aki-y - cutoff now defaults to `nv(g)` Co-authored-by: @i-aki-y Co-authored-by: Etienne dg --- src/Graphs.jl | 7 +- src/traversals/all_simple_paths.jl | 156 ++++++++++++++++++++++++++++ test/runtests.jl | 1 + test/traversals/all_simple_paths.jl | 127 ++++++++++++++++++++++ 4 files changed, 290 insertions(+), 1 deletion(-) create mode 100644 src/traversals/all_simple_paths.jl create mode 100644 test/traversals/all_simple_paths.jl diff --git a/src/Graphs.jl b/src/Graphs.jl index 675db9eef..73b0a18c0 100644 --- a/src/Graphs.jl +++ b/src/Graphs.jl @@ -23,7 +23,8 @@ using DataStructures: union!, find_root!, BinaryMaxHeap, - BinaryMinHeap + BinaryMinHeap, + Stack using LinearAlgebra: I, Symmetric, diagm, eigen, eigvals, norm, rmul!, tril, triu import LinearAlgebra: Diagonal, issymmetric, mul! using Random: @@ -196,6 +197,9 @@ export # eulerian eulerian, + + # all simple paths + all_simple_paths, # coloring greedy_color, @@ -496,6 +500,7 @@ include("traversals/maxadjvisit.jl") include("traversals/randomwalks.jl") include("traversals/diffusion.jl") include("traversals/eulerian.jl") +include("traversals/all_simple_paths.jl") include("connectivity.jl") include("distance.jl") include("editdist.jl") diff --git a/src/traversals/all_simple_paths.jl b/src/traversals/all_simple_paths.jl new file mode 100644 index 000000000..9a1c42a8f --- /dev/null +++ b/src/traversals/all_simple_paths.jl @@ -0,0 +1,156 @@ +""" + all_simple_paths(g, u, v; cutoff=nv(g)) --> Graphs.SimplePathIterator + +Returns an iterator that generates all simple paths in the graph `g` from a source vertex +`u` to a target vertex `v` or iterable of target vertices `vs`. + +The iterator's elements (i.e., the paths) can be materialized via `collect` or `iterate`. +Paths are iterated in the order of a depth-first search. + +## Keyword arguments +The maximum path length (i.e., number of edges) is limited by the keyword argument `cutoff` +(default, `nv(g)`). If a path's path length is greater than or equal to `cutoff`, it is +omitted. + +## Examples +```jldoctest +julia> using Graphs +julia> g = complete_graph(4) +julia> spi = all_simple_paths(g, 1, 4) + Graphs.SimplePathIterator(1 → 4) +julia> collect(spi) +5-element Vector{Vector{Int64}}: + [1, 4] + [1, 3, 4] + [1, 3, 2, 4] + [1, 2, 4] + [1, 2, 3, 4] +``` +We can restrict the search to paths of length less than a specified cut-off (here, 2 edges): +```jldoctest +julia> collect(all_simple_paths(g, 1, 4; cutoff=2)) + [1, 2, 4] + [1, 3, 4] + [1, 4] +``` +""" +function all_simple_paths( + g::AbstractGraph{T}, + u::T, + vs; + cutoff::T=nv(g) + ) where T <: Integer + + vs = vs isa Set{T} ? vs : Set{T}(vs) + return SimplePathIterator(g, u, vs, cutoff) +end + +""" + SimplePathIterator{T <: Integer} + +Iterator that generates all simple paths in `g` from `u` to `vs` of a length at most +`cutoff`. +""" +struct SimplePathIterator{T <: Integer, G <: AbstractGraph{T}} + g::G + u::T # start vertex + vs::Set{T} # target vertices + cutoff::T # max length of resulting paths +end + +function Base.show(io::IO, spi::SimplePathIterator) + print(io, "SimplePathIterator{", typeof(spi.g), "}(", spi.u, " → ") + if length(spi.vs) == 1 + print(io, only(spi.vs)) + else + print(io, '[') + join(io, spi.vs, ", ") + print(io, ']') + end + print(io, ')') +end +Base.IteratorSize(::Type{<:SimplePathIterator}) = Base.SizeUnknown() +Base.eltype(::SimplePathIterator{T}) where T = Vector{T} + +mutable struct SimplePathIteratorState{T <: Integer} + stack::Stack{Vector{T}} # used to restore iteration of child vertices; each vector has + # two elements: a parent vertex and an index of children + visited::Stack{T} # current path candidate + queued::Vector{T} # remaining targets if path length reached cutoff +end +function SimplePathIteratorState(spi::SimplePathIterator{T}) where T <: Integer + stack = Stack{Vector{T}}() + visited = Stack{T}() + queued = Vector{T}() + push!(visited, spi.u) # add a starting vertex to the path candidate + push!(stack, [spi.u, 1]) # add a child node with index 1 + SimplePathIteratorState{T}(stack, visited, queued) +end + +function _stepback!(state::SimplePathIteratorState) # updates iterator state. + pop!(state.stack) + pop!(state.visited) +end + + +""" + Base.iterate(spi::SimplePathIterator{T}, state=nothing) + +Returns the next simple path in `spi`, according to a depth-first search. +""" +function Base.iterate( + spi::SimplePathIterator{T}, + state::SimplePathIteratorState=SimplePathIteratorState(spi) + ) where T <: Integer + + while !isempty(state.stack) + if !isempty(state.queued) # consume queued targets + target = pop!(state.queued) + result = vcat(reverse(collect(state.visited)), target) + if isempty(state.queued) + _stepback!(state) + end + return result, state + end + + parent_node, next_childe_index = first(state.stack) + children = outneighbors(spi.g, parent_node) + if length(children) < next_childe_index + # all children have been checked, step back. + _stepback!(state) + continue + end + + child = children[next_childe_index] + first(state.stack)[2] += 1 # move child index forward + child in state.visited && continue + + if length(state.visited) == spi.cutoff + # collect adjacent targets if more exist and add them to queue + rest_children = Set(children[next_childe_index: end]) + state.queued = collect(setdiff(intersect(spi.vs, rest_children), Set(state.visited))) + + if isempty(state.queued) + _stepback!(state) + end + else + result = if child in spi.vs + vcat(reverse(collect(state.visited)), child) + else + nothing + end + + # update state variables + push!(state.visited, child) # move to child vertex + if !isempty(setdiff(spi.vs, state.visited)) # expand stack until all targets are found + push!(state.stack, [child, 1]) # add the child node as a parent for next iteration + else + pop!(state.visited) # step back and explore the remaining child nodes + end + + if !isnothing(result) # found a new path, return it + return result, state + end + end + end +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index df304b74f..1764fe537 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -109,6 +109,7 @@ tests = [ "traversals/randomwalks", "traversals/diffusion", "traversals/eulerian", + "traversals/all_simple_paths", "community/cliques", "community/core-periphery", "community/label_propagation", diff --git a/test/traversals/all_simple_paths.jl b/test/traversals/all_simple_paths.jl new file mode 100644 index 000000000..3e851743b --- /dev/null +++ b/test/traversals/all_simple_paths.jl @@ -0,0 +1,127 @@ +@testset "All simple paths" begin + # single path + g = path_graph(4) + paths = all_simple_paths(g, 1, 4) + @test Set(p for p in paths) == Set([[1, 2, 3, 4]]) + @test Set(collect(paths)) == Set([[1, 2, 3, 4]]) + @test 1 == length(paths) + + + # single path with cutoff + @test collect(all_simple_paths(g, 1, 4; cutoff=2)) == [[1, 2, 4], [1, 3, 4], [1, 4]] + + # two paths + g = path_graph(4) + add_vertex!(g) + add_edge!(g, 3, 5) + paths = all_simple_paths(g, 1, [4, 5]) + @test Set(p for p in paths) == Set([[1, 2, 3, 4], [1, 2, 3, 5]]) + @test Set(collect(paths)) == Set([[1, 2, 3, 4], [1, 2, 3, 5]]) + @test 2 == length(paths) + + # two paths with cutoff + g = path_graph(4) + add_vertex!(g) + add_edge!(g, 3, 5) + paths = all_simple_paths(g, 1, [4, 5], cutoff=3) + @test Set(p for p in paths) == Set([[1, 2, 3, 4], [1, 2, 3, 5]]) + + # two targets in line emits two paths + g = path_graph(4) + add_vertex!(g) + paths = all_simple_paths(g, 1, [3, 4]) + @test Set(p for p in paths) == Set([[1, 2, 3], [1, 2, 3, 4]]) + + # two paths digraph + g = SimpleDiGraph(5) + add_edge!(g, 1, 2) + add_edge!(g, 2, 3) + add_edge!(g, 3, 4) + add_edge!(g, 3, 5) + paths = all_simple_paths(g, 1, [4, 5]) + @test Set(p for p in paths) == Set([[1, 2, 3, 4], [1, 2, 3, 5]]) + + # two paths digraph with cutoff + g = SimpleDiGraph(5) + add_edge!(g, 1, 2) + add_edge!(g, 2, 3) + add_edge!(g, 3, 4) + add_edge!(g, 3, 5) + paths = all_simple_paths(g, 1, [4, 5], cutoff=3) + @test Set(p for p in paths) == Set([[1, 2, 3, 4], [1, 2, 3, 5]]) + + # digraph with a cycle + g = SimpleDiGraph(4) + add_edge!(g, 1, 2) + add_edge!(g, 2, 3) + add_edge!(g, 3, 1) + add_edge!(g, 2, 4) + paths = all_simple_paths(g, 1, 4) + @test Set(p for p in paths) == Set([[1, 2, 4]]) + + # digraph with a cycle. paths with two targets share a node in the cycle. + g = SimpleDiGraph(4) + add_edge!(g, 1, 2) + add_edge!(g, 2, 3) + add_edge!(g, 3, 1) + add_edge!(g, 2, 4) + paths = all_simple_paths(g, 1, [3, 4]) + @test Set(p for p in paths) == Set([[1, 2, 3], [1, 2, 4]]) + + # source equals targets + g = SimpleGraph(4) + paths = all_simple_paths(g, 1, 1) + @test Set(p for p in paths) == Set([]) + + # cutoff prones paths + # Note, a path lenght is node - 1 + g = complete_graph(4) + paths = all_simple_paths(g, 1, 2; cutoff=1) + @test Set(p for p in paths) == Set([[1, 2]]) + + paths = all_simple_paths(g, 1, 2; cutoff=2) + @test Set(p for p in paths) == Set([[1, 2], [1, 3, 2], [1, 4, 2]]) + + # non trivial graph + g = SimpleDiGraph(6) + add_edge!(g, 1, 2) + add_edge!(g, 2, 3) + add_edge!(g, 3, 4) + add_edge!(g, 4, 5) + + add_edge!(g, 1, 6) + add_edge!(g, 2, 6) + add_edge!(g, 2, 4) + add_edge!(g, 6, 5) + add_edge!(g, 5, 3) + add_edge!(g, 5, 4) + + paths = all_simple_paths(g, 2, [3, 4]) + @test Set(p for p in paths) == Set([ + [2, 3], + [2, 4, 5, 3], + [2, 6, 5, 3], + [2, 4], + [2, 3, 4], + [2, 6, 5, 4], + [2, 6, 5, 3, 4], + ]) + + paths = all_simple_paths(g, 2, [3, 4], cutoff=3) + @test Set(p for p in paths) == Set([ + [2, 3], + [2, 4, 5, 3], + [2, 6, 5, 3], + [2, 4], + [2, 3, 4], + [2, 6, 5, 4], + ]) + + paths = all_simple_paths(g, 2, [3, 4], cutoff=2) + @test Set(p for p in paths) == Set([ + [2, 3], + [2, 4], + [2, 3, 4], + ]) + +end \ No newline at end of file