JuliaGraphs · thchr · Nov 14, 2024 · Nov 14, 2024 · Nov 14, 2024 · Nov 14, 2024
diff --git a/src/Graphs.jl b/src/Graphs.jl
@@ -210,6 +210,8 @@ export
 
     # connectivity
     connected_components,
+    connected_components!,
+    count_connected_components,
     strongly_connected_components,
     strongly_connected_components_kosaraju,
     strongly_connected_components_tarjan,

diff --git a/src/connectivity.jl b/src/connectivity.jl
@@ -1,26 +1,33 @@
 # Parts of this code were taken / derived from Graphs.jl. See LICENSE for
 # licensing details.
 """
-    connected_components!(label, g)
+    connected_components!(label, g, [search_queue])
 
 Fill `label` with the `id` of the connected component in the undirected graph
 `g` to which it belongs. Return a vector representing the component assigned
 to each vertex. The component value is the smallest vertex ID in the component.
 
-### Performance
+## Optional arguments
+- `search_queue`, an empty `Vector{eltype(edgetype(g))}`, can be provided to avoid
+   reallocating this work array repeatedly on repeated calls of `connected_components!`.
+   If not provided, it is automatically instantiated.
+
+## Performance
 This algorithm is linear in the number of edges of the graph.
 """
-function connected_components!(label::AbstractVector, g::AbstractGraph{T}) where {T}
+function connected_components!(
+    label::AbstractVector{T}, g::AbstractGraph{T}, search_queue::Vector{T}=Vector{T}()
+) where {T}
+    empty!(search_queue)
     for u in vertices(g)
         label[u] != zero(T) && continue
         label[u] = u
-        Q = Vector{T}()
-        push!(Q, u)
-        while !isempty(Q)
-            src = popfirst!(Q)
+        push!(search_queue, u)
+        while !isempty(search_queue)
+            src = popfirst!(search_queue)
             for vertex in all_neighbors(g, src)
                 if label[vertex] == zero(T)
-                    push!(Q, vertex)
+                    push!(search_queue, vertex)
                     label[vertex] = u
                 end
             end
@@ -129,9 +136,74 @@ julia> is_connected(g)
 true
 ```
 """
-function is_connected(g::AbstractGraph)
+function is_connected(g::AbstractGraph{T}) where {T}
     mult = is_directed(g) ? 2 : 1
-    return mult * ne(g) + 1 >= nv(g) && length(connected_components(g)) == 1
+    if mult * ne(g) + 1 >= nv(g)
+        label = zeros(T, nv(g))
+        connected_components!(label, g)
+        return allequal(label)
+    else
+        return false
+    end
+end
+
+"""
+    count_connected_components( g, [label, search_queue]; reset_label::Bool=false)
+
+Return the number of connected components in `g`.
+
+Equivalent to `length(connected_components(g))` but uses fewer allocations by not
+materializing the component vectors explicitly. 
+
+## Optional arguments
+Mutated work arrays, `label` and `search_queue` can be provided to avoid allocating these
+arrays repeatedly on repeated calls of `count_connected_components`. 
+For `g :: AbstractGraph{T}`, `label` must be a zero-initialized `Vector{T}` of length
+`nv(g)` and `search_queue` a `Vector{T}`. See also [`connected_components!`](@ref).
+
+## Keyword arguments
+- `reset_label :: Bool` (default, `false`): if `true`, `label` is reset to a zero-vector
+  before returning.
+
+## Example
+```
+julia> using Graphs
+
+julia> g = Graph(Edge.([1=>2, 2=>3, 3=>1, 4=>5, 5=>6, 6=>4, 7=>8]));
+
+length> connected_components(g)
+3-element Vector{Vector{Int64}}:
+ [1, 2, 3]
+ [4, 5, 6]
+ [7, 8]
+
+julia> count_connected_components(g)
+3
+```
+"""
+function count_connected_components(
+    g::AbstractGraph{T},
+    label::AbstractVector{T}=zeros(T, nv(g)),
+    search_queue::Vector{T}=Vector{T}();
+    reset_label::Bool=false,
+) where {T}
+    connected_components!(label, g, search_queue)
+    c = count_unique(label)
+    reset_label && fill!(label, zero(eltype(label)))
+    return c
+end
+
+function count_unique(label::Vector{T}) where {T}
+    # effectively does `length(Set(label))` but faster, since `Set(label)` sizehints
+    # aggressively and assumes that most elements of `label` will be unique, which very
+    # rarely will be the case for caller `count_connected_components!`
+    seen = T === Int ? BitSet() : Set{T}() # if `T=Int`, we can use faster BitSet
+    for l in label
+        # faster than direct `push!(seen, l)` when `label` has few unique elements relative
+        # to `length(label)`
+        l ∉ seen && push!(seen, l)
+    end
+    return length(seen)
 end
 
 """

diff --git a/test/operators.jl b/test/operators.jl
@@ -268,6 +268,7 @@
     for i in 3:4
         @testset "Tensor Product: $g" for g in testgraphs(path_graph(i))
             @test length(connected_components(tensor_product(g, g))) == 2
+            @test count_connected_components(tensor_product(g, g)) == 2
         end
     end
 

diff --git a/test/spanningtrees/boruvka.jl b/test/spanningtrees/boruvka.jl
@@ -21,14 +21,18 @@
         g1t = GenericGraph(SimpleGraph(edges1))
         @test res1.weight == cost_mst
         # acyclic graphs have n - c edges
-        @test nv(g1t) - length(connected_components(g1t)) == ne(g1t)
+        @test nv(g1t) - ne(g1t) ==
+            length(connected_components(g1t)) ==
+            count_connected_components(g1t)
         @test nv(g1t) == nv(g)
 
         res2 = boruvka_mst(g, distmx; minimize=false)
         edges2 = [Edge(src(e), dst(e)) for e in res2.mst]
         g2t = GenericGraph(SimpleGraph(edges2))
         @test res2.weight == cost_max_vec_mst
-        @test nv(g2t) - length(connected_components(g2t)) == ne(g2t)
+        @test nv(g2t) - ne(g2t) ==
+            length(connected_components(g2t)) ==
+            count_connected_components(g2t)
         @test nv(g2t) == nv(g)
     end
     # second test
@@ -60,14 +64,18 @@
         edges3 = [Edge(src(e), dst(e)) for e in res3.mst]
         g3t = GenericGraph(SimpleGraph(edges3))
         @test res3.weight == weight_vec2
-        @test nv(g3t) - length(connected_components(g3t)) == ne(g3t)
+        @test nv(g3t) - ne(g3t) ==
+            length(connected_components(g3t)) ==
+            count_connected_components(g3t)
         @test nv(g3t) == nv(gx)
 
         res4 = boruvka_mst(g, distmx_sec; minimize=false)
         edges4 = [Edge(src(e), dst(e)) for e in res4.mst]
         g4t = GenericGraph(SimpleGraph(edges4))
         @test res4.weight == weight_max_vec2
-        @test nv(g4t) - length(connected_components(g4t)) == ne(g4t)
+        @test nv(g4t) - ne(g4t) ==
+            length(connected_components(g4t)) ==
+            count_connected_components(g4t)
         @test nv(g4t) == nv(gx)
     end
 
@@ -123,14 +131,18 @@
         edges5 = [Edge(src(e), dst(e)) for e in res5.mst]
         g5t = GenericGraph(SimpleGraph(edges5))
         @test res5.weight == weight_vec3
-        @test nv(g5t) - length(connected_components(g5t)) == ne(g5t)
+        @test nv(g5t) - ne(g5t) ==
+            length(connected_components(g5t)) ==
+            count_connected_components(g5t)
         @test nv(g5t) == nv(gd)
 
         res6 = boruvka_mst(g, distmx_third; minimize=false)
         edges6 = [Edge(src(e), dst(e)) for e in res6.mst]
         g6t = GenericGraph(SimpleGraph(edges6))
         @test res6.weight == weight_max_vec3
-        @test nv(g6t) - length(connected_components(g6t)) == ne(g6t)
+        @test nv(g6t) - ne(g6t) ==
+            length(connected_components(g6t)) ==
+            count_connected_components(g6t)
         @test nv(g6t) == nv(gd)
     end
 end