diff --git a/src/ProbabilisticCircuits.jl b/src/ProbabilisticCircuits.jl index f3fe217b..19975095 100644 --- a/src/ProbabilisticCircuits.jl +++ b/src/ProbabilisticCircuits.jl @@ -53,6 +53,10 @@ include("structurelearner/init.jl") include("structurelearner/heuristics.jl") include("structurelearner/learner.jl") include("structurelearner/vtree_learner.jl") +include("structurelearner/sample_psdd.jl") + +include("ensembles/ensembles.jl") +include("ensembles/bmc.jl") include("LoadSave/LoadSave.jl") @reexport using .LoadSave diff --git a/src/structurelearner/sample_psdd.jl b/src/structurelearner/sample_psdd.jl index ccf0c55d..052d6466 100644 --- a/src/structurelearner/sample_psdd.jl +++ b/src/structurelearner/sample_psdd.jl @@ -120,11 +120,17 @@ Samples a PSDD from a BDD `ϕ` and vtree `V` with at most `k` elements in each d @inline function sample_psdd(ϕ::Diagram, V::Vtree, k::Integer, D::DataFrame; opts::SamplingOpts = full, randomize_weights::Bool = false, pseudocount::Real = 1.0, fact_on_⊤::Bool = false, ⊤_k::Integer = k, p_mr::Real = 0.5, always_compress::Bool = false, - always_merge::Bool = false, merge_branch::Real = 0.0)::StructProbCircuit + always_merge::Bool = false, merge_branch::Real = 0.0, maxiter::Integer = 0)::StructProbCircuit memo = Dict{Tuple{Vtree, Diagram}, StructSumNode}() C = sample_psdd_r(ϕ, V, k, Dict{Int32, StructProbLiteralNode}(), randomize_weights, opts, fact_on_⊤, ⊤_k, p_mr, always_compress, always_merge, memo, merge_branch > 0.0, merge_branch, false, false) + if maxiter > 0 + # Optionally grow the circuit by Strudel. + loss(x) = heuristic_loss(x, D) + C = struct_learn(C; primitives = [split_step], kwargs = Dict(split_step => (loss = loss,)), + maxiter, verbose = false) + end !randomize_weights && estimate_parameters(C, D; pseudocount) return C end diff --git a/test/Project.toml b/test/Project.toml index 4e02b373..17e03a04 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,5 +1,6 @@ [deps] Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +BinaryDecisionDiagrams = "cd45f48d-2bff-4983-a793-756749635bd1" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" diff --git a/test/structurelearner/sample_psdd_tests.jl b/test/structurelearner/sample_psdd_tests.jl new file mode 100644 index 00000000..e01abf6e --- /dev/null +++ b/test/structurelearner/sample_psdd_tests.jl @@ -0,0 +1,39 @@ +using Test +using ProbabilisticCircuits +using DataFrames +using BinaryDecisionDiagrams +import LogicCircuits: Vtree + +@testset "SamplePSDD tests" begin + # Set up a logic constraint ϕ as a BDD and scope size n. Sample m PSDDs. + function case(ϕ::Diagram, n::Integer; m::Integer = 20, atol::Real = 0) + # All possible valuations (including impossible ones). + M = all_valuations(collect(1:n)) + # Get only possible worlds. + W = M[findall(ϕ.(eachrow(M))),:] + # Assign random probabilities for each world in W. + R = rand(1:20, size(W, 1)) + # Construct a dataset that maps the distribution of R (world W[i] repeats R[i] times). + D = DataFrame(vcat([repeat(W[i,:], 1, R[i])' for i ∈ 1:size(W, 1)]...)) + # Learn PSDDs from ϕ and D. Overfit them so that we can use ≈ without Julia complaining. + C = Vector{StructProbCircuit}(undef, m) + Threads.@threads for i ∈ 1:m + C[i] = sample_psdd(ϕ, Vtree(n, :random), 16, D; pseudocount = 0.0, maxiter = 100) + end + T = DataFrame(M) + for i ∈ 1:m + # Test consistency. + @test (EVI(C[i], T) .> -Inf) == ϕ.(eachrow(M)) + # Test probabilities. + evi = exp.(EVI(C[i], T)) + @test isapprox(evi[findall(>(0), evi)], (R/sum(R)); atol) + end + end + + case((1 ∧ 2) ∨ (3 ∧ ¬4) ∨ (¬1 ∧ 5), 5) + case((1 → 3) ∧ (5 → ¬2), 5) + case(and(1, 2, 3) ∨ and(4, 5), 5) + case(exactly(3, collect(1:5)), 5) + case(atleast(3, collect(1:5)), 5) + case(atmost(3, collect(1:5)), 5) +end