From 0b56db08e51d751a58307a9d3764fb85cc11d024 Mon Sep 17 00:00:00 2001 From: Renato Lui Geh Date: Thu, 24 Jun 2021 09:16:22 -0300 Subject: [PATCH] Add Ensemble I/O --- Project.toml | 1 + src/LoadSave/circuit_loaders.jl | 44 +++++++++++++++++++++- src/LoadSave/circuit_savers.jl | 29 +++++++++++++- src/ensembles/bmc.jl | 3 +- src/ensembles/ensembles.jl | 4 +- src/structurelearner/sample_psdd.jl | 7 ++-- test/ensembles/bmc_tests.jl | 17 +++++---- test/ensembles/ensembles_tests.jl | 36 +++++++++++++----- test/structurelearner/sample_psdd_tests.jl | 17 +++++---- 9 files changed, 125 insertions(+), 33 deletions(-) diff --git a/Project.toml b/Project.toml index 7ec5c207..db3b73b4 100644 --- a/Project.toml +++ b/Project.toml @@ -28,6 +28,7 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" ThreadPools = "b189fb0b-2eb5-4ed4-bc0c-d34c51242431" TikzGraphs = "b4f28e30-c73f-5eaf-a395-8a9db949a742" +ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" [compat] CUDA = "2, 3.0" diff --git a/src/LoadSave/circuit_loaders.jl b/src/LoadSave/circuit_loaders.jl index 84c58b28..f68e49f7 100644 --- a/src/LoadSave/circuit_loaders.jl +++ b/src/LoadSave/circuit_loaders.jl @@ -1,5 +1,5 @@ export zoo_clt, zoo_clt_file, zoo_psdd, zoo_lc, load_prob_circuit, -load_struct_prob_circuit, load_logistic_circuit +load_struct_prob_circuit, load_logistic_circuit, load_as_ensemble using LogicCircuits using Pkg.Artifacts @@ -96,3 +96,45 @@ function parse_clt(filename::String)::MetaDiGraph return clt end +"Loads an ensemble from disk." +function load_as_ensemble(name::String; quiet::Bool = false)::Ensemble{StructProbCircuit} + @assert endswith(name, ".esbl") + zip = ZipFile.Reader(name) + W, n = Vector{Float64}(), -1 + for f ∈ zip.files + if endswith(f.name, ".meta") + n = parse(Int, readline(f)) + W = map(x -> parse(Float64, x), split(readline(f))) + end + end + @assert n > 0 && length(W) == n "Ensemble file format corrupted, empty or missing meta file." + P = Tuple{Int, Int}[(0, 0) for i ∈ 1:n] + for (i, f) ∈ enumerate(zip.files) + if endswith(f.name, ".psdd") + j = parse(Int, f.name[1:end-5]) + @assert j > 0 && j <= n "Either .meta file is corrupted or .psdd is misnamed (faulty: $(f.name))." + P[j] = (i, P[j][2]) + elseif endswith(f.name, ".vtree") + j = parse(Int, f.name[1:end-6]) + @assert j > 0 && j <= n "Either .meta file is corrupted or .vtree is misnamed (faulty: $(f.name))." + P[j] = (P[j][1], i) + end + end + C = Vector{StructProbCircuit}(undef, n) + function do_work(k::Int, i::Int, j::Int) + @assert i > 0 "Missing .psdd file for the $k-th circuit." + @assert j > 0 "Missing .psdd file for the $k-th circuit." + psdd_file, vtree_file = zip.files[i], zip.files[j] + psdd, _ = load_struct_prob_circuit(psdd_file, vtree_file) + C[k] = psdd + nothing + end + !quiet && print("Loading circuits...\n ") + for (k, (i, j)) ∈ enumerate(P) + do_work(k, i, j) + !quiet && print('*') + end + !quiet && print('\n') + close(zip) + return Ensemble{StructProbCircuit}(C, W) +end diff --git a/src/LoadSave/circuit_savers.jl b/src/LoadSave/circuit_savers.jl index ccfb8545..ba36dfc3 100644 --- a/src/LoadSave/circuit_savers.jl +++ b/src/LoadSave/circuit_savers.jl @@ -1,5 +1,6 @@ -export save_circuit, save_as_dot, save_as_psdd, save_as_logistic +export save_circuit, save_as_dot, save_as_psdd, save_as_logistic, save_as_ensemble +using ZipFile using LogicCircuits.LoadSave: SDDElement, PSDDElement, save_lines, @@ -195,3 +196,29 @@ function save_as_dot(file::String, circuit::ProbCircuit) flush(f) close(f) end + +"Save file as a .esbl ensemble file format." +function save_as_ensemble(name::String, ensemble::Ensemble{StructProbCircuit}; quiet::Bool = false) + @assert endswith(name, ".esbl") + zip = ZipFile.Writer(name) + f_w = ZipFile.addfile(zip, "ensemble.meta") + n = length(ensemble.C) + write(f_w, "$(n)\n") + write(f_w, join(ensemble.W, ' ')) + close(f_w) + function do_work(C::StructProbCircuit, i::Integer) + f_c = ZipFile.addfile(zip, "$(i).psdd") + save_as_psdd(f_c, C, C.vtree) + f_v = ZipFile.addfile(zip, "$(i).vtree") + save_vtree(f_v, C.vtree) + nothing + end + !quiet && print("Saving circuits...\n ") + for (i, C) ∈ enumerate(ensemble.C) + do_work(C, i) + !quiet && print('*') + end + !quiet && print('\n') + close(zip) + nothing +end diff --git a/src/ensembles/bmc.jl b/src/ensembles/bmc.jl index 02f815b5..1881b5fd 100644 --- a/src/ensembles/bmc.jl +++ b/src/ensembles/bmc.jl @@ -1,3 +1,5 @@ +export BayesModelComb, bmc_sample_psdd + using Distributions: Dirichlet "Bayesian Model Combination." @@ -46,7 +48,6 @@ function bmc_sample_psdd(n::Integer, ϕ::Diagram, k::Integer, D::DataFrame, q::I LL .= LL ./ sum(LL) return BayesModelComb(E, log.(LL)) end -export bmc_sample_psdd function weighted_query(B::BayesModelComb{T}, D::DataFrame, f::Function; kwargs...)::Vector{Float64} where T <: ProbCircuit n, m = nrow(D), length(B.E) diff --git a/src/ensembles/ensembles.jl b/src/ensembles/ensembles.jl index 4d3e14d8..5ffe446c 100644 --- a/src/ensembles/ensembles.jl +++ b/src/ensembles/ensembles.jl @@ -1,3 +1,5 @@ +export Ensemble, ensemble_sample_psdd, sample_vtree + using ThreadPools """Split `X` into two partitions `A` and `B`, where `A` is a Bernoulli sample of each element in @@ -25,7 +27,6 @@ function sample_vtree(n::Int, p::Float64)::Vtree end return p < 0 ? Vtree(n, :random) : passdown(shuffle!(collect(1:n))) end -export sample_vtree "Weighted ensemble of probabilistic circuits." mutable struct Ensemble{T <: ProbCircuit} @@ -70,7 +71,6 @@ function ensemble_sample_psdd(n::Integer, ϕ::Diagram, k::Int, D::DataFrame; vtr @assert strategy == :uniform "Unrecognized ensemble strategy." return E end -export ensemble_sample_psdd "Learns the weights of the Ensemble by the likelihood value of data `D`." function learn_ensemble_llw!(E::Ensemble{T}, D::DataFrame)::Ensemble{T} where T <: ProbCircuit diff --git a/src/structurelearner/sample_psdd.jl b/src/structurelearner/sample_psdd.jl index 09c0c71c..6ce025b8 100644 --- a/src/structurelearner/sample_psdd.jl +++ b/src/structurelearner/sample_psdd.jl @@ -1,6 +1,8 @@ +export sample_psdd + using StatsFuns -using BinaryDecisionDiagrams: Diagram, BinaryDecisionDiagrams -const BDD = BinaryDecisionDiagrams +using BinaryDecisionDiagrams: Diagram +import BinaryDecisionDiagrams as BDD "Samples an element from a Binomial distribution with p=0.5." function sample_row(n::Int)::Int @@ -134,7 +136,6 @@ Samples a PSDD from a BDD `ϕ` and vtree `V` with at most `k` elements in each d !randomize_weights && estimate_parameters(C, D; pseudocount) return C end -export sample_psdd function sample_psdd_r(ϕ::Diagram, V::Vtree, k::Integer, leaves::Dict{Int32, StructProbLiteralNode}, randomize_weights::Bool, opts::SamplingOpts, fact_on_⊤::Bool, ⊤_k::Integer, p_mr::Real, diff --git a/test/ensembles/bmc_tests.jl b/test/ensembles/bmc_tests.jl index e4cd8e7d..7b70049b 100644 --- a/test/ensembles/bmc_tests.jl +++ b/test/ensembles/bmc_tests.jl @@ -1,13 +1,14 @@ using Test using ProbabilisticCircuits using DataFrames -using BinaryDecisionDiagrams +using BinaryDecisionDiagrams: Diagram +import BinaryDecisionDiagrams as BDD @testset "BMC tests with SamplePSDD" begin # Set up a logic constraint ϕ as a BDD and scope size n. function case(ϕ::Diagram, n::Integer; atol::Real = 0) # All possible valuations (including impossible ones). - M = all_valuations(collect(1:n)) + M = BDD.all_valuations(collect(1:n)) # Get only possible worlds. W = M[findall(ϕ.(eachrow(M))),:] # Assign random probabilities for each world in W. @@ -24,10 +25,10 @@ using BinaryDecisionDiagrams @test isapprox(evi[findall(>(0), evi)], (R/sum(R)); atol) end - case((1 ∧ 2) ∨ (3 ∧ ¬4) ∨ (¬1 ∧ 5), 5) - case((1 → 3) ∧ (5 → ¬2), 5) - case(and(1, 2, 3) ∨ and(4, 5), 5) - case(exactly(3, collect(1:5)), 5) - case(atleast(3, collect(1:5)), 5) - case(atmost(3, collect(1:5)), 5) + case(BDD.or(BDD.and(1, 2), BDD.and(3, BDD.:¬(4)), BDD.and(BDD.:¬(1), 5)), 5) + case(BDD.and(BDD.:→(1, 3), BDD.:→(5, BDD.:¬(2))), 5) + case(BDD.or(BDD.and(1, 2, 3), BDD.and(4, 5)), 5) + case(BDD.exactly(3, collect(1:5)), 5) + case(BDD.atleast(3, collect(1:5)), 5) + case(BDD.atmost(3, collect(1:5)), 5) end diff --git a/test/ensembles/ensembles_tests.jl b/test/ensembles/ensembles_tests.jl index 3f7e9730..cbbf5e7b 100644 --- a/test/ensembles/ensembles_tests.jl +++ b/test/ensembles/ensembles_tests.jl @@ -1,13 +1,14 @@ using Test using ProbabilisticCircuits using DataFrames -using BinaryDecisionDiagrams +using BinaryDecisionDiagrams: Diagram +import BinaryDecisionDiagrams as BDD @testset "ensemble tests with SamplePSDD" begin # Set up a logic constraint ϕ as a BDD and scope size n. Sample m PSDDs. - function case(ϕ::Diagram, n::Integer, strategy::Symbol; m::Integer = 20, atol::Real = 1e-2) + function case(ϕ::Diagram, n::Integer, strategy::Symbol; m::Integer = 20, atol::Real = 1e-2)::Ensemble{StructProbCircuit} # All possible valuations (including impossible ones). - M = all_valuations(collect(1:n)) + M = BDD.all_valuations(collect(1:n)) # Get only possible worlds. W = M[findall(ϕ.(eachrow(M))),:] # Assign random probabilities for each world in W. @@ -23,14 +24,31 @@ using BinaryDecisionDiagrams # Test probabilities. evi = exp.(EVI(E, T)) @test isapprox(evi[findall(>(0), evi)], (R/sum(R)); atol) + return E end + Es = Vector{Ensemble{StructProbCircuit}}() for strategy ∈ [:likelihood, :uniform, :em, :stacking] - case((1 ∧ 2) ∨ (3 ∧ ¬4) ∨ (¬1 ∧ 5), 5, strategy) - case((1 → 3) ∧ (5 → ¬2), 5, strategy) - case(and(1, 2, 3) ∨ and(4, 5), 5, strategy) - case(exactly(3, collect(1:5)), 5, strategy) - case(atleast(3, collect(1:5)), 5, strategy) - case(atmost(3, collect(1:5)), 5, strategy) + push!(Es, case(BDD.or(BDD.and(1, 2), BDD.and(3, BDD.:¬(4)), BDD.and(BDD.:¬(1), 5)), 5, strategy)) + push!(Es, case(BDD.and(BDD.:→(1, 3), BDD.:→(5, BDD.:¬(2))), 5, strategy)) + push!(Es, case(BDD.or(BDD.and(1, 2, 3), BDD.and(4, 5)), 5, strategy)) + push!(Es, case(BDD.exactly(3, collect(1:5)), 5, strategy)) + push!(Es, case(BDD.atleast(3, collect(1:5)), 5, strategy)) + push!(Es, case(BDD.atmost(3, collect(1:5)), 5, strategy)) + end + + tmp = mktempdir() + @testset "Saving and loading ensembles" begin + for (i, E) ∈ enumerate(Es) + @test_nowarn save_as_ensemble("$tmp/$i.esbl", E; quiet = true) + end + end + Rs = Vector{Ensemble{StructProbCircuit}}() + T = DataFrame(BDD.all_valuations(1:5)) + @testset "Loading ensembles" begin + for i ∈ 1:length(Es) + E = load_as_ensemble("$tmp/$i.esbl"; quiet = true) + @test EVI(E, T) ≈ EVI(Es[i], T) + end end end diff --git a/test/structurelearner/sample_psdd_tests.jl b/test/structurelearner/sample_psdd_tests.jl index e01abf6e..0e286aee 100644 --- a/test/structurelearner/sample_psdd_tests.jl +++ b/test/structurelearner/sample_psdd_tests.jl @@ -1,14 +1,15 @@ using Test using ProbabilisticCircuits using DataFrames -using BinaryDecisionDiagrams +using BinaryDecisionDiagrams: Diagram +import BinaryDecisionDiagrams as BDD import LogicCircuits: Vtree @testset "SamplePSDD tests" begin # Set up a logic constraint ϕ as a BDD and scope size n. Sample m PSDDs. function case(ϕ::Diagram, n::Integer; m::Integer = 20, atol::Real = 0) # All possible valuations (including impossible ones). - M = all_valuations(collect(1:n)) + M = BDD.all_valuations(collect(1:n)) # Get only possible worlds. W = M[findall(ϕ.(eachrow(M))),:] # Assign random probabilities for each world in W. @@ -30,10 +31,10 @@ import LogicCircuits: Vtree end end - case((1 ∧ 2) ∨ (3 ∧ ¬4) ∨ (¬1 ∧ 5), 5) - case((1 → 3) ∧ (5 → ¬2), 5) - case(and(1, 2, 3) ∨ and(4, 5), 5) - case(exactly(3, collect(1:5)), 5) - case(atleast(3, collect(1:5)), 5) - case(atmost(3, collect(1:5)), 5) + case(BDD.or(BDD.and(1, 2), BDD.and(3, BDD.:¬(4)), BDD.and(BDD.:¬(1), 5)), 5) + case(BDD.and(BDD.:→(1, 3), BDD.:→(5, BDD.:¬(2))), 5) + case(BDD.or(BDD.and(1, 2, 3), BDD.and(4, 5)), 5) + case(BDD.exactly(3, collect(1:5)), 5) + case(BDD.atleast(3, collect(1:5)), 5) + case(BDD.atmost(3, collect(1:5)), 5) end