diff --git a/ext/NautyACSetsExt.jl b/ext/NautyACSetsExt.jl index 43edb34..be72688 100644 --- a/ext/NautyACSetsExt.jl +++ b/ext/NautyACSetsExt.jl @@ -4,8 +4,12 @@ using ACSets using nauty_jll """Compute CSetNautyRes from an ACSet.""" -function ACSets.call_nauty(g::ACSet)::CSetNautyRes - ACSets.NautyInterface.parse_res(nauty_res(g), g) +function ACSets.call_nauty(g::ACSet; use_nauty=true)::CSetNautyRes + if Sys.iswindows() || !use_nauty + CSetAutomorphisms.to_nauty_res(g) + else + ACSets.NautyInterface.parse_res(nauty_res(g), g) + end end """Make shell command to dreadnaut (nauty) and collect stdout text.""" diff --git a/src/ACSets.jl b/src/ACSets.jl index 405cc52..ff3770e 100644 --- a/src/ACSets.jl +++ b/src/ACSets.jl @@ -14,7 +14,7 @@ include("DenseACSets.jl") include("intertypes/InterTypes.jl") include("serialization/Serialization.jl") include("ADTs.jl") -include("NautyInterface.jl") +include("nauty/Nauty.jl") @reexport using .ColumnImplementations: AttrVar @reexport using .Schemas @@ -23,6 +23,6 @@ include("NautyInterface.jl") @reexport using .InterTypes @reexport using .ACSetSerialization using .ADTs -@reexport using .NautyInterface +@reexport using .Nauty end diff --git a/src/nauty/CSetAutomorphisms.jl b/src/nauty/CSetAutomorphisms.jl new file mode 100644 index 0000000..3092ab8 --- /dev/null +++ b/src/nauty/CSetAutomorphisms.jl @@ -0,0 +1,275 @@ +""" +This is code which is required because nauty_jll is not supported on Windows: +thus we need a makeshift implementation of Nauty within Julia. There is +potential to do this in a much cleaner and more efficient way with a virtual +machine, but for now performance is not a priority. +""" +module CSetAutomorphisms + +using ...ACSetInterface, ...DenseACSets, ...Schemas +using ..NautyInterface +using Permutations + + +# Color assigned to each elem of each component +const CDict = Dict{Symbol, Vector{Int}} + +"""Construct permutation σ⁻¹ such that σσ⁻¹=id""" +invert_perms(x::CDict) = Dict([k=>Base.invperm(v) for (k, v) in collect(x)]) + +check_auto(x::CDict)::Bool = all(Base.isperm, values(x)) + +# Sequence of keys, to index a Tree +const VPSI = Vector{Pair{Symbol, Int}} + +max0(x::Vector{Int})::Int = isempty(x) ? 0 : maximum(x) + +include("ColorRefine.jl") + +# CSets with attributes replaced w/ combinatorial representatives +################################################################# + +""" +To compute automorphisms of Attributed CSets, we create a pseudo CSet which has +additional components for each data type. + +This is inefficient for attributes which have a total order on them +(e.g. integers/strings) since we solve for a canonical permutation of the +attributes. Future work could address this by initializing the coloring with +the 'correct' canonical order. +""" +function pseudo_cset(g::ACSet)::Tuple{ACSet, Dict{Symbol,Vector{Any}}} + # Create copy of schema (+ an extra component for each datatype) + S = acset_schema(g) + pres = deepcopy(S) + append!(pres.obs, pres.attrtypes) + append!(pres.homs, pres.attrs) + empty!.([pres.attrtypes, pres.attrs]) + + # Use Julia ordering to give each value an index + attrvals = Dict(map(attrtypes(S)) do at + vals = Set{Any}() + [union!(vals, g[a]) for a in attrs(S; just_names=true, to=at)] + at => vcat(filter(x->!(x isa AttrVar), vals) |> collect |> sort, + AttrVar.(parts(g, at))) + end) + + # Create and populate pseudo-cset + res = AnonACSet(pres, index=arrows(S; just_names=true)) + + copy_parts!(res, g) + + # Replace data value with an index for each attribute + for t in attrtypes(S) + add_parts!(res, t, length(attrvals[t]) - nparts(g, t)) + for (a,d,_) in attrs(S; to=t) + for p in parts(g, d) + res[p, a] = findfirst(==(g[p, a]), attrvals[t]) + end + end + end + + (res, attrvals) +end + +""" +Inverse of pseudo_cset. Requires mapping (generated by `pseudo_cset`) of indices +for each Data to the actual data values. +""" +function pseudo_cset_inv(g::ACSet, orig::ACSet, attrvals::AbstractDict) + S = acset_schema(orig) + orig = deepcopy(orig) + for arr in hom(S) + orig[arr] = g[arr] + end + for (darr,_,tgt) in attrs(S) + orig[darr] = attrvals[tgt][g[darr]] + end + orig +end + +# Results +######### + +"""Apply a coloring to a C-set to get an isomorphic cset""" +function apply_automorphism(c::ACSet, d::CDict) + check_auto(d) || error("received coloring that is not an automorphism: $d") + new = deepcopy(c) + for (arr, src, tgt) in homs(acset_schema(c)) + new[d[src], arr] = d[tgt][c[arr]] + end + for (arr, src, _) in attrs(acset_schema(c)) + new[d[src], arr] = c[arr] + end + new +end + +function to_nauty_res(g::ACSet) + p, avals = pseudo_cset(g) + c, m = [pseudo_cset_inv(apply_automorphism(p, Dict(a)), g, avals) => a + for a in autos(p)[1]] |> sort |> first + strhsh = string(c) + orbits = Dict{Symbol, Vector{Int}}() # todo + generators = Pair{Int, Vector{Permutation}}[] # todo + CSetNautyRes(strhsh, orbits, generators, 0, m, c) +end + +# Trees +####### + +"""Find index at which two vectors diverge (used in `search_tree`)""" +function common(v1::Vector{T}, v2::Vector{T})::Int where {T} + for (i, (x, y)) in enumerate(zip(v1, v2)) + x == y || return i - 1 + end + min(length(v1), length(v2)) +end + +""" +Search tree explored by Nauty. Each node has an input coloring, a refined +coloring, and a set of children indexed by which element (in the smallest +nontrivial orbit) has its symmetry artificially broken. +""" +struct Tree + coloring::CDict + saturated::CDict + children::Dict{Pair{Symbol, Int}, Tree} + Tree() = new(CDict(), CDict(), Dict{Pair{Symbol, Int}, Tree}()) +end + +"""Get a node via a sequence of edges from the root""" +function Base.getindex(t::Tree, pth::VPSI)::Tree + ptr = t + for p in pth + ptr = ptr.children[p] + end + ptr +end + +"""Automorphism based pruning when we've found a new leaf node (τ @ t)""" +function compute_auto_prune(tree::Tree, t::VPSI, τ::CDict, leafnodes::Set{VPSI})::Set{VPSI} + skip = Set{VPSI}() + for p in filter(!=(t), leafnodes) + π = tree[p].saturated + i = common(p, t) + _, _, c = abc = p[1:i], p[1:i+1], t[1:i+1] # == t[1:i] + a_, b_, c_ = [tree[x].saturated for x in abc] + γ = compose_perms(π, invert_perms(τ)) + if (compose_perms(γ, a_) == a_ && compose_perms(γ, b_) == c_) + # skip everything from c to a + for i in length(c):length(t) + push!(skip, t[1:i]) + end + break + end + end + filter(!=(t), skip) # has something gone wrong if we need to do this? +end + +""" +Get vector listing nontrivial colors (which component and which color index) as +well as how many elements have that color. E.g. for (V=[1,1,2], E=[1,2,2,2,3,3]) +we would get `[2=>(:V,1), 3=>(:E,2), 2=>(:E, 3)]` +""" +function get_colors_by_size(coloring::CDict)::Vector{Pair{Int,Tuple{Symbol, Int}}} + res = [] + for (k, v) in coloring + for color in 1:max0(v) + n_c = count(==(color), v) + n_c > 1 && push!(res, n_c => (k, color)) # Store which table and which color + end + end + res +end + + +"""To reduce branching factor, split on the SMALLEST nontrivial partition""" +function split_data(coloring::CDict)::Tuple{Symbol, Int, Vector{Int}} + colors_by_size = sort(get_colors_by_size(coloring), rev=false) + isempty(colors_by_size) && return :_nothing, 0, [] + split_tab, split_color = colors_by_size[1][2] + colors = coloring[split_tab] + split_inds = findall(==(split_color), colors) + (split_tab, split_color, split_inds) +end + +""" +DFS tree of colorings, with edges being choices in how to break symmetry +Goal is to acquire all leaf nodes. + +Algorithm from "McKay’s Canonical Graph Labeling Algorithm" by Hartke and +Radcliffe (2009). + +McKay's "Practical Graph Isomorphism" (Section 2.29: "storage of identity +nodes") warns that it's not a good idea to check for every possible automorphism +pruning (for memory and time concerns). To do: look into doing this in a more +balanced way. Profiling code will probably reveal that checking for automorphism +pruning is a bottleneck. + +Inputs: + - g: our structure that we are computing automorphisms for + - res: all automorphisms found so far + - split_seq: sequence of edges (our current location in the tree) + - tree: all information known so far - this gets modified + - leafnodes: coordinates of all automorphisms found so far + - skip: flagged coordinates which have been pruned +""" +function search_tree!(g::ACSet, init_coloring::CDict, split_seq::VPSI, + tree::Tree, leafnodes::Set{VPSI}, skip::Set{VPSI}) + curr_tree = tree[split_seq] + # Perform color saturation + coloring = color_saturate(g; init_color=init_coloring) + for (k, v) in pairs(coloring) + curr_tree.coloring[k] = init_coloring[k] + curr_tree.saturated[k] = v + end + + split_tab, _, split_inds = split_data(coloring) + + # Check if we are now at a leaf node + if isempty(split_inds) + # Add result to list of results + push!(leafnodes, split_seq) + check_auto(coloring) # fail if not a perm + + # Prune with automorphisms + pruned = compute_auto_prune(tree, split_seq, coloring, leafnodes) + isempty(pruned) || union!(skip, pruned) + else + # Branch on this leaf + for split_ind in split_inds + if split_ind == split_inds[1] + # Construct arguments for recursive call to child + new_coloring = deepcopy(coloring) + new_seq = vcat(split_seq, [split_tab => split_ind]) + new_coloring[split_tab][split_ind] = maximum(coloring[split_tab]) + 1 + curr_tree.children[split_tab => split_ind] = Tree() + search_tree!(g, new_coloring, new_seq, tree, leafnodes, skip) + end + end + end +end + +"""Get coordinates of all nodes in a tree that have no children""" +function get_leaves(t::Tree)::Vector{VPSI} + if isempty(t.children) + [Pair{Symbol,Int}[]] + else + res = [] + for (kv, c) in t.children + for pth in get_leaves(c) + push!(res, vcat([kv],pth)) + end + end + res + end +end + +"""Compute the automorphisms of a CSet""" +function autos(g::ACSet)::Tuple{Set{CDict}, Tree} + tree, leafnodes = Tree(), Set{VPSI}() + search_tree!(g, nocolor(g), VPSI(), tree, leafnodes,Set{VPSI}()) + Set([tree[ln].saturated for ln in leafnodes]), tree +end + +end # module diff --git a/src/nauty/ColorRefine.jl b/src/nauty/ColorRefine.jl new file mode 100644 index 0000000..97971a8 --- /dev/null +++ b/src/nauty/ColorRefine.jl @@ -0,0 +1,96 @@ + +using StructEquality +using .....ColumnImplementations: AttrVar + +""" +Data for an individual component (each vector corresponds to its elements) +1.) how many of each color (for each in-arrow) targets this point +2.) what color this point targets (for each out arrow) + +This could be extended to add extra automorphism-invariant properties. +E.g. detecting if src+tgt both point to the same element +""" +@struct_hash_equal struct CDataPoint + indata::Vector{Vector{Int}} + outdata::Vector{Int} +end + +"""Data required to color a CSet (each element of each component)""" +const CData = Dict{Symbol, Vector{CDataPoint}} + +""" +Computes colors for a CSet, distinguishing nodes by their immediate +connectivity. It is not sufficient to compute the automorphism group, but it is +a good starting point. + +This does not generalize to ACSets. We cannot naively throw the attributes as +raw data into the color data. It will make indistinguishable elements (e.g. two +elements that map to different data but otherwise can be permuted) as +distinguishable. +""" +function compute_color_data(g::ACSet, color::CDict)::CData + S = acset_schema(g) + res = CData() + for tab in ob(S) # compute colordata for each tab + subres = map(homs(S; to=tab)) do (arr, src, _) # vector for each in-arrow + color_src = color[src] + subsubres = zeros(Int, nparts(g, tab), max0(color_src)) + for (colorsrc, arrtgt) in zip(color_src, g[arr]) + subsubres[arrtgt, colorsrc] += 1 + end + subsubres + end + + # Also compute per-element data for table `tgt` (now, regard as a src) + out_subres = map(homs(S; from=tab)) do (oga, _, tgt) + color[tgt][g[oga]] + end + + # Combine the two pieces of data for each elmeent in tgt, store in res + res[tab] = map(parts(g,tab)) do i + CDataPoint([ssr[i,:] for ssr in subres], [osr[i] for osr in out_subres]) + end + end + res +end + +"""Initial state for tree search: every element is symmetric""" +nocolor(g::ACSet) = + CDict([k => ones(Int, nparts(g, k)) for k in ob(acset_schema(g))]) + +""" +Iterative color refinement based on the number (and identity) of incoming and +outgoing arrows. +Inputs: + - g: CSet we are color saturating + - init_color: initial coloring, if any (default: uniform) +Returns: + - trajectory of colorings +""" +function color_saturate(g::ACSet; init_color::Union{Nothing,CDict}=nothing) + # Default: uniform coloring + new_color = isnothing(init_color) ? nocolor(g) : init_color + + prev_n, curr_n, iter = 0, 1, 0 + hashes = Dict{Symbol, Vector{UInt}}() + while prev_n != curr_n + iter += 1 + prev_color = new_color + # All that matters about newdata's type is that it is hashable + newdata = compute_color_data(g, prev_color) + # Distinguish by both color AND newly computed color data + new_datahash = Dict{Symbol, Vector{UInt}}( + [k=>map(hash, zip(prev_color[k],v)) for (k, v) in collect(newdata)]) + # Identify set of new colors for each component + hashes = Dict{Symbol, Vector{UInt}}( + [k=>sort(collect(Set(v))) for (k, v) in new_datahash]) + # Assign new colors by hash value of color+newdata + new_color = CDict([ + k=>[findfirst(==(new_datahash[k][i]), hashes[k]) + for i in 1:nparts(g, k)] + for (k, v) in new_datahash]) + prev_n = sum(map(max0, values(prev_color))) + curr_n = sum(map(max0, values(new_color))) + end + new_color +end diff --git a/src/nauty/Nauty.jl b/src/nauty/Nauty.jl new file mode 100644 index 0000000..dce996c --- /dev/null +++ b/src/nauty/Nauty.jl @@ -0,0 +1,11 @@ +module Nauty + +using Reexport + +include("NautyInterface.jl") +include("CSetAutomorphisms.jl") + +@reexport using .CSetAutomorphisms +@reexport using .NautyInterface + +end # module diff --git a/src/NautyInterface.jl b/src/nauty/NautyInterface.jl similarity index 98% rename from src/NautyInterface.jl rename to src/nauty/NautyInterface.jl index 9e577b8..78595d4 100644 --- a/src/NautyInterface.jl +++ b/src/nauty/NautyInterface.jl @@ -3,8 +3,9 @@ module NautyInterface export NautyRes, CSetNautyRes, call_nauty, all_autos, canon, orbits, canonmap, strhsh, ngroup -using ..Schemas -using ..DenseACSets, ..ACSetInterface +using ...Schemas +using ...DenseACSets +using ...ACSetInterface import AlgebraicInterfaces: generators using DataStructures: OrderedSet, DefaultDict using Permutations @@ -105,8 +106,7 @@ function parse_res(res::String, g::ACSet)::CSetNautyRes end # parse permutation for the canonical graph rng = match(reg_perm, res[sec : end]) - # cp = CPerm(oinds, [parse(Int, x) for x in split(strip(rng.match), r"\s+")], S) - # canonoffset = Dict([k=>canon[v].-(v.start-2) for (k,v) in oinds if k ∈ ob(S)]) + cp = Dict(map(ob(S)) do o canon = [parse(Int, x) for x in split(strip(rng.match), r"\s+")] o=>Permutation(canon[oinds[o]].-(oinds[o].start-2)) diff --git a/test/nauty/CSetAutomorphisms.jl b/test/nauty/CSetAutomorphisms.jl new file mode 100644 index 0000000..7b17b97 --- /dev/null +++ b/test/nauty/CSetAutomorphisms.jl @@ -0,0 +1,68 @@ +module TestCSetAutomorphisms + +using Test +using ACSets +using Permutations + +using ACSets.CSetAutomorphisms: to_nauty_res + +SchGraph = BasicSchema([:E,:V], [(:src,:E,:V),(:tgt,:E,:V)]) + +@acset_type Graph(SchGraph, index=[:src,:tgt]) + +@test CSetNautyRes(Graph()).canon == Graph() + +""" +Add a graph with 5 vertices and 7 edges to an existing graph. This is one +of two isomorphic graphs, controlled by the boolean argument. +""" +function addV5E7!(G::Graph=Graph(), flag::Bool=true) + vs = add_parts!(G,:V, 5) + s, t = (flag ? ([1,2,3,2,3,1,4],[2,3,1,1,2,3,5]) + : ([2,3,1,3,1,2,5],[3,1,2,2,3,1,4])) + add_parts!(G, :E, 7; src=vs[s], tgt=vs[t]) + G +end + +G = addV5E7!() +H = addV5E7!(Graph(), false) + +@test allequal(canon.(to_nauty_res.([G, H]))) + +# Attributes + +SchDecGraph = BasicSchema([:E,:V], [(:src,:E,:V),(:tgt,:E,:V)], + [:X], [(:dec,:E,:X)]) + +@acset_type Labeled(SchDecGraph) + +G = @acset Labeled{String} begin + V = 4; E = 4; src = [1,2,3,4]; tgt = [2,3,4,1]; dec = ["a","b","c","d"] +end; +K = @acset Labeled{String} begin + V = 4; E = 4; src = [2,3,4,1]; tgt = [3,4,1,2]; dec = ["b","c","d","a"] +end; +H = @acset Labeled{String} begin + V = 4; E = 4; src = [3,2,4,1]; tgt = [2,4,1,3]; dec = ["b","c","d","a"] +end; + +@test allequal(canon.(to_nauty_res.([G, K, H]))) + +# AttrVars + +G = @acset Labeled{String} begin + V = 4; E = 4; X=2; src = [1,2,3,4]; tgt = [2,3,4,1]; + dec = ["a", AttrVar.(1:2)...,"d"] +end; +K = @acset Labeled{String} begin + V = 4; E = 4; X=2;src = [2,3,4,1]; tgt = [3,4,1,2]; + dec = [reverse(AttrVar.(1:2))...,"d","a"] +end; +H = @acset Labeled{String} begin + V = 4; E = 4; X=2; src = [3,2,4,1]; tgt = [2,4,1,3]; + dec = [reverse(AttrVar.(1:2))...,"d","a"] +end; + +@test allequal(canon.(to_nauty_res.([G, K, H]))) + +end # module diff --git a/test/NautyInterface.jl b/test/nauty/NautyInterface.jl similarity index 98% rename from test/NautyInterface.jl rename to test/nauty/NautyInterface.jl index e8b2540..ae4c4e7 100644 --- a/test/NautyInterface.jl +++ b/test/nauty/NautyInterface.jl @@ -15,7 +15,7 @@ function iso(X::T, Y::T, comps::Dict{Symbol, Vector{Int}}) where {T <: ACSet} all(isperm, values(comps)) || return false all(((h, c, cd),) -> comps[cd][X[h]] == Y[h][comps[c]], homs(acset_schema(X))) end -iso(X,R::CSetNautyRes) = iso(X, canon(R), canonmap(R)) +iso(X, R::CSetNautyRes) = iso(X, canon(R), canonmap(R)) # Graphs ######## diff --git a/test/runtests.jl b/test/runtests.jl index 9d2698c..a8c660b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -33,6 +33,10 @@ end end @testset "Nauty" begin - include("NautyInterface.jl") + include("nauty/NautyInterface.jl") +end + +@testset "Nauty" begin + include("nauty/CSetAutomorphisms.jl") end