Skip to content

Commit

Permalink
Add Ensemble I/O
Browse files Browse the repository at this point in the history
  • Loading branch information
RenatoGeh committed Jun 24, 2021
1 parent f169790 commit e6b04f8
Show file tree
Hide file tree
Showing 9 changed files with 123 additions and 31 deletions.
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
ThreadPools = "b189fb0b-2eb5-4ed4-bc0c-d34c51242431"
TikzGraphs = "b4f28e30-c73f-5eaf-a395-8a9db949a742"
ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"

[compat]
CUDA = "2, 3.0"
Expand Down
44 changes: 43 additions & 1 deletion src/LoadSave/circuit_loaders.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
export zoo_clt, zoo_clt_file, zoo_psdd, zoo_lc, load_prob_circuit,
load_struct_prob_circuit, load_logistic_circuit
load_struct_prob_circuit, load_logistic_circuit, load_as_ensemble

using LogicCircuits
using Pkg.Artifacts
Expand Down Expand Up @@ -96,3 +96,45 @@ function parse_clt(filename::String)::MetaDiGraph
return clt
end

"Loads an ensemble from disk."
function load_as_ensemble(name::String; quiet::Bool = false)::Ensemble{StructProbCircuit}
@assert endswith(name, ".esbl")
zip = ZipFile.Reader(name)
W, n = Vector{Float64}(), -1
for f zip.files
if endswith(f.name, ".meta")
n = parse(Int, readline(f))
W = map(x -> parse(Float64, x), split(readline(f)))
end
end
@assert n > 0 && length(W) == n "Ensemble file format corrupted, empty or missing meta file."
P = Tuple{Int, Int}[(0, 0) for i 1:n]
for (i, f) enumerate(zip.files)
if endswith(f.name, ".psdd")
j = parse(Int, f.name[1:end-5])
@assert j > 0 && j <= n "Either .meta file is corrupted or .psdd is misnamed (faulty: $(f.name))."
P[j] = (i, P[j][2])
elseif endswith(f.name, ".vtree")
j = parse(Int, f.name[1:end-6])
@assert j > 0 && j <= n "Either .meta file is corrupted or .vtree is misnamed (faulty: $(f.name))."
P[j] = (P[j][1], i)
end
end
C = Vector{StructProbCircuit}(undef, n)
function do_work(k::Int, i::Int, j::Int)
@assert i > 0 "Missing .psdd file for the $k-th circuit."
@assert j > 0 "Missing .psdd file for the $k-th circuit."
psdd_file, vtree_file = zip.files[i], zip.files[j]
psdd, _ = load_struct_prob_circuit(psdd_file, vtree_file)
C[k] = psdd
nothing
end
!quiet && print("Loading circuits...\n ")
for (k, (i, j)) enumerate(P)
do_work(k, i, j)
!quiet && print('*')
end
!quiet && print('\n')
close(zip)
return Ensemble{StructProbCircuit}(C, W)
end
29 changes: 28 additions & 1 deletion src/LoadSave/circuit_savers.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
export save_circuit, save_as_dot, save_as_psdd, save_as_logistic
export save_circuit, save_as_dot, save_as_psdd, save_as_logistic, save_as_ensemble

using ZipFile
using LogicCircuits.LoadSave: SDDElement,
PSDDElement,
save_lines,
Expand Down Expand Up @@ -195,3 +196,29 @@ function save_as_dot(file::String, circuit::ProbCircuit)
flush(f)
close(f)
end

"Save file as a .esbl ensemble file format."
function save_as_ensemble(name::String, ensemble::Ensemble{StructProbCircuit}; quiet::Bool = false)
@assert endswith(name, ".esbl")
zip = ZipFile.Writer(name)
f_w = ZipFile.addfile(zip, "ensemble.meta")
n = length(ensemble.C)
write(f_w, "$(n)\n")
write(f_w, join(ensemble.W, ' '))
close(f_w)
function do_work(C::StructProbCircuit, i::Integer)
f_c = ZipFile.addfile(zip, "$(i).psdd")
save_as_psdd(f_c, C, C.vtree)
f_v = ZipFile.addfile(zip, "$(i).vtree")
save_vtree(f_v, C.vtree)
nothing
end
!quiet && print("Saving circuits...\n ")
for (i, C) enumerate(ensemble.C)
do_work(C, i)
!quiet && print('*')
end
!quiet && print('\n')
close(zip)
nothing
end
3 changes: 2 additions & 1 deletion src/ensembles/bmc.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
export BayesModelComb, bmc_sample_psdd

using Distributions: Dirichlet

"Bayesian Model Combination."
Expand Down Expand Up @@ -46,7 +48,6 @@ function bmc_sample_psdd(n::Integer, ϕ::Diagram, k::Integer, D::DataFrame, q::I
LL .= LL ./ sum(LL)
return BayesModelComb(E, log.(LL))
end
export bmc_sample_psdd

function weighted_query(B::BayesModelComb{T}, D::DataFrame, f::Function; kwargs...)::Vector{Float64} where T <: ProbCircuit
n, m = nrow(D), length(B.E)
Expand Down
4 changes: 2 additions & 2 deletions src/ensembles/ensembles.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
export Ensemble, ensemble_sample_psdd, sample_vtree

using ThreadPools

"""Split `X` into two partitions `A` and `B`, where `A` is a Bernoulli sample of each element in
Expand Down Expand Up @@ -25,7 +27,6 @@ function sample_vtree(n::Int, p::Float64)::Vtree
end
return p < 0 ? Vtree(n, :random) : passdown(shuffle!(collect(1:n)))
end
export sample_vtree

"Weighted ensemble of probabilistic circuits."
mutable struct Ensemble{T <: ProbCircuit}
Expand Down Expand Up @@ -70,7 +71,6 @@ function ensemble_sample_psdd(n::Integer, ϕ::Diagram, k::Int, D::DataFrame; vtr
@assert strategy == :uniform "Unrecognized ensemble strategy."
return E
end
export ensemble_sample_psdd

"Learns the weights of the Ensemble by the likelihood value of data `D`."
function learn_ensemble_llw!(E::Ensemble{T}, D::DataFrame)::Ensemble{T} where T <: ProbCircuit
Expand Down
3 changes: 2 additions & 1 deletion src/structurelearner/sample_psdd.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
export sample_psdd

using StatsFuns
using BinaryDecisionDiagrams: Diagram, BinaryDecisionDiagrams
const BDD = BinaryDecisionDiagrams
Expand Down Expand Up @@ -134,7 +136,6 @@ Samples a PSDD from a BDD `ϕ` and vtree `V` with at most `k` elements in each d
!randomize_weights && estimate_parameters(C, D; pseudocount)
return C
end
export sample_psdd

function sample_psdd_r::Diagram, V::Vtree, k::Integer, leaves::Dict{Int32, StructProbLiteralNode},
randomize_weights::Bool, opts::SamplingOpts, fact_on_⊤::Bool, ⊤_k::Integer, p_mr::Real,
Expand Down
17 changes: 9 additions & 8 deletions test/ensembles/bmc_tests.jl
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
using Test
using ProbabilisticCircuits
using DataFrames
using BinaryDecisionDiagrams
using BinaryDecisionDiagrams: Diagram, BinaryDecisionDiagrams
const BDD = BinaryDecisionDiagrams

@testset "BMC tests with SamplePSDD" begin
# Set up a logic constraint ϕ as a BDD and scope size n.
function case::Diagram, n::Integer; atol::Real = 0)
# All possible valuations (including impossible ones).
M = all_valuations(collect(1:n))
M = BDD.all_valuations(collect(1:n))
# Get only possible worlds.
W = M[findall(ϕ.(eachrow(M))),:]
# Assign random probabilities for each world in W.
Expand All @@ -24,10 +25,10 @@ using BinaryDecisionDiagrams
@test isapprox(evi[findall(>(0), evi)], (R/sum(R)); atol)
end

case((1 2) (3 ¬4) (¬1 5), 5)
case((1 3) (5 ¬2), 5)
case(and(1, 2, 3) and(4, 5), 5)
case(exactly(3, collect(1:5)), 5)
case(atleast(3, collect(1:5)), 5)
case(atmost(3, collect(1:5)), 5)
case(BDD.or(BDD.and(1, 2), BDD.and(3, BDD.:¬(4)), BDD.and(BDD.:¬(1), 5)), 5)
case(BDD.and(BDD.:(1, 3), BDD.:(5, BDD.:¬(2))), 5)
case(BDD.or(BDD.and(1, 2, 3), BDD.and(4, 5)), 5)
case(BDD.exactly(3, collect(1:5)), 5)
case(BDD.atleast(3, collect(1:5)), 5)
case(BDD.atmost(3, collect(1:5)), 5)
end
36 changes: 27 additions & 9 deletions test/ensembles/ensembles_tests.jl
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
using Test
using ProbabilisticCircuits
using DataFrames
using BinaryDecisionDiagrams
using BinaryDecisionDiagrams: Diagram, BinaryDecisionDiagrams
const BDD = BinaryDecisionDiagrams

@testset "ensemble tests with SamplePSDD" begin
# Set up a logic constraint ϕ as a BDD and scope size n. Sample m PSDDs.
function case::Diagram, n::Integer, strategy::Symbol; m::Integer = 20, atol::Real = 1e-2)
function case::Diagram, n::Integer, strategy::Symbol; m::Integer = 20, atol::Real = 1e-2)::Ensemble{StructProbCircuit}
# All possible valuations (including impossible ones).
M = all_valuations(collect(1:n))
M = BDD.all_valuations(collect(1:n))
# Get only possible worlds.
W = M[findall(ϕ.(eachrow(M))),:]
# Assign random probabilities for each world in W.
Expand All @@ -23,14 +24,31 @@ using BinaryDecisionDiagrams
# Test probabilities.
evi = exp.(EVI(E, T))
@test isapprox(evi[findall(>(0), evi)], (R/sum(R)); atol)
return E
end

Es = Vector{Ensemble{StructProbCircuit}}()
for strategy [:likelihood, :uniform, :em, :stacking]
case((1 2) (3 ¬4) (¬1 5), 5, strategy)
case((1 3) (5 ¬2), 5, strategy)
case(and(1, 2, 3) and(4, 5), 5, strategy)
case(exactly(3, collect(1:5)), 5, strategy)
case(atleast(3, collect(1:5)), 5, strategy)
case(atmost(3, collect(1:5)), 5, strategy)
push!(Es, case(BDD.or(BDD.and(1, 2), BDD.and(3, BDD.:¬(4)), BDD.and(BDD.:¬(1), 5)), 5, strategy))
push!(Es, case(BDD.and(BDD.:(1, 3), BDD.:(5, BDD.:¬(2))), 5, strategy))
push!(Es, case(BDD.or(BDD.and(1, 2, 3), BDD.and(4, 5)), 5, strategy))
push!(Es, case(BDD.exactly(3, collect(1:5)), 5, strategy))
push!(Es, case(BDD.atleast(3, collect(1:5)), 5, strategy))
push!(Es, case(BDD.atmost(3, collect(1:5)), 5, strategy))
end

tmp = mktempdir()
@testset "Saving and loading ensembles" begin
for (i, E) enumerate(Es)
@test_nowarn save_as_ensemble("$tmp/$i.esbl", E; quiet = true)
end
end
Rs = Vector{Ensemble{StructProbCircuit}}()
T = DataFrame(BDD.all_valuations(1:5))
@testset "Loading ensembles" begin
for i 1:length(Es)
E = load_as_ensemble("$tmp/$i.esbl"; quiet = true)
@test EVI(E, T) EVI(Es[i], T)
end
end
end
17 changes: 9 additions & 8 deletions test/structurelearner/sample_psdd_tests.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
using Test
using ProbabilisticCircuits
using DataFrames
using BinaryDecisionDiagrams
using BinaryDecisionDiagrams: Diagram, BinaryDecisionDiagrams
const BDD = BinaryDecisionDiagrams
import LogicCircuits: Vtree

@testset "SamplePSDD tests" begin
# Set up a logic constraint ϕ as a BDD and scope size n. Sample m PSDDs.
function case::Diagram, n::Integer; m::Integer = 20, atol::Real = 0)
# All possible valuations (including impossible ones).
M = all_valuations(collect(1:n))
M = BDD.all_valuations(collect(1:n))
# Get only possible worlds.
W = M[findall(ϕ.(eachrow(M))),:]
# Assign random probabilities for each world in W.
Expand All @@ -30,10 +31,10 @@ import LogicCircuits: Vtree
end
end

case((1 2) (3 ¬4) (¬1 5), 5)
case((1 3) (5 ¬2), 5)
case(and(1, 2, 3) and(4, 5), 5)
case(exactly(3, collect(1:5)), 5)
case(atleast(3, collect(1:5)), 5)
case(atmost(3, collect(1:5)), 5)
case(BDD.or(BDD.and(1, 2), BDD.and(3, BDD.:¬(4)), BDD.and(BDD.:¬(1), 5)), 5)
case(BDD.and(BDD.:(1, 3), BDD.:(5, BDD.:¬(2))), 5)
case(BDD.or(BDD.and(1, 2, 3), BDD.and(4, 5)), 5)
case(BDD.exactly(3, collect(1:5)), 5)
case(BDD.atleast(3, collect(1:5)), 5)
case(BDD.atmost(3, collect(1:5)), 5)
end

0 comments on commit e6b04f8

Please sign in to comment.