Tractables · khosravipasha · Mar 2, 2021 · Jan 20, 2021 · Feb 28, 2021
diff --git a/src/mixtures/em.jl b/src/mixtures/em.jl
@@ -1,10 +1,10 @@
-export one_step_em, 
-    component_weights_per_example, 
-    initial_weights, 
+export one_step_em,
+    component_weights_per_example,
+    initial_weights,
     clustering,
-    log_likelihood_per_instance_per_component, 
-    estimate_parameters_cached, 
-    learn_circuit_mixture, 
+    log_likelihood_per_instance_per_component,
+    estimate_parameters_cached,
+    learn_circuit_mixture,
     learn_strudel
 
 using Statistics: mean
@@ -67,13 +67,13 @@ end
 
 function log_likelihood_per_instance_per_component(pc::SharedProbCircuit, data::DataFrame, values::Matrix{UInt64}, flows::Matrix{UInt64})
     @assert isbinarydata(data) "Can only calculate EVI on Bool data"
-    
+
     N = num_examples(data)
     num_mix = num_components(pc)
     log_likelihoods = zeros(Float64, N, num_mix)
     indices = init_array(Bool, N)::BitVector
-    
-    
+
+
     ll(n::SharedProbCircuit) = ()
     ll(n::SharedSumNode) = begin
         if num_children(n) != 1 # other nodes have no effect on likelihood
@@ -90,7 +90,7 @@ function log_likelihood_per_instance_per_component(pc::SharedProbCircuit, data::
     log_likelihoods
 end
 
-function estimate_parameters_cached(pc::SharedProbCircuit, example_weights::Matrix{Float64}, 
+function estimate_parameters_cached(pc::SharedProbCircuit, example_weights::Matrix{Float64},
         values::Matrix{UInt64}, flows::Matrix{UInt64}; pseudocount::Float64)
     N = size(example_weights, 1)
     foreach(pc) do pn
@@ -119,12 +119,14 @@ See "Strudel: Learning Structured-Decomposable Probabilistic Circuits. [arxiv.or
 """
 function learn_strudel(train_x; num_mix = 5,
     pseudocount=1.0,
-    init_maxiter = 10, 
-    em_maxiter = 20)
+    init_maxiter = 10,
+    em_maxiter = 20,
+    verbose = true)
 
-    pc = learn_circuit(train_x, maxiter=init_maxiter)
-    learn_circuit_mixture(pc, train_x; num_mix = num_mix, pseudocount= pseudocount, em_maxiter=em_maxiter)
-end  
+    pc = learn_circuit(train_x; maxiter = init_maxiter, verbose = verbose)
+    learn_circuit_mixture(pc, train_x; num_mix = num_mix, pseudocount = pseudocount,
+                          em_maxiter = em_maxiter, verbose = verbose)
+end
 
 
 """
@@ -135,7 +137,8 @@ Given a circuit, learns a mixture of structure decomposable circuits based on th
 function learn_circuit_mixture(pc, data;
         num_mix=5,
         pseudocount=1.0,
-        em_maxiter=20)
+        em_maxiter=20,
+        verbose = true)
 
     spc = compile(SharedProbCircuit, pc, num_mix)
     values, flows = satisfies_flows(spc, data)
@@ -146,7 +149,7 @@ function learn_circuit_mixture(pc, data;
     for iter in 1 : em_maxiter
         @assert isapprox(sum(component_weights), 1.0; atol=1e-10)
         lls, component_weights = one_step_em(spc, data, values, flows, component_weights; pseudocount=pseudocount)
-        println("EM Iteration $iter/$em_maxiter. Log likelihood $(mean(lls))")
+        verbose && println("EM Iteration $iter/$em_maxiter. Log likelihood $(mean(lls))")
     end
     spc, component_weights, lls
 end

diff --git a/src/structurelearner/heuristics.jl b/src/structurelearner/heuristics.jl
@@ -46,8 +46,9 @@ function heuristic_loss(circuit::LogicCircuit, train_x; pick_edge="eFlow", pick_
     else
         weights = nothing
     end
-    
+
     candidates, variable_scope = split_candidates(circuit)
+    if isempty(candidates) return nothing end
     values, flows = satisfies_flows(circuit, train_x; weights = nothing) # Do not use samples weights here
     if pick_edge == "eFlow"
         edge, flow = eFlow(values, flows, candidates)

diff --git a/src/structurelearner/learner.jl b/src/structurelearner/learner.jl
@@ -12,13 +12,14 @@ function learn_circuit(train_x;
         sanity_check=true,
         maxiter=100,
         seed=nothing,
-        return_vtree=false)
+        return_vtree=false,
+        verbose=true)
 
     # Initial Structure
     pc, vtree = learn_chow_liu_tree_circuit(train_x)
 
     learn_circuit(train_x, pc, vtree; pick_edge, pick_var, depth, pseudocount, sanity_check, 
-                  maxiter, seed, return_vtree, entropy_reg)
+                  maxiter, seed, return_vtree, entropy_reg, verbose)
 end
 function learn_circuit(train_x, pc, vtree;
         pick_edge="eFlow", pick_var="vMI", depth=1, 
@@ -30,7 +31,8 @@ function learn_circuit(train_x, pc, vtree;
         batch_size=0,
         splitting_data=nothing,
         use_gpu=false,
-        entropy_reg=0.0)
+        entropy_reg=0.0,
+        verbose=true)
 
     if seed !== nothing
         Random.seed!(seed)
@@ -41,7 +43,9 @@ function learn_circuit(train_x, pc, vtree;
                                    pick_edge=pick_edge, pick_var=pick_var)
 
     pc_split_step(circuit) = begin
-        c::ProbCircuit, = split_step(circuit; loss=loss, depth=depth, sanity_check=sanity_check)
+        r = split_step(circuit; loss=loss, depth=depth, sanity_check=sanity_check)
+        if isnothing(r) return nothing end
+        c, = r
         if batch_size > 0
             estimate_parameters(c, batch(train_x, batch_size); pseudocount, use_gpu, entropy_reg)
         else
@@ -57,14 +61,14 @@ function learn_circuit(train_x, pc, vtree;
         else
             ll = log_likelihood_avg(circuit, train_x; use_gpu)
         end
-        println("Iteration $iter/$maxiter. LogLikelihood = $(ll); nodes = $(num_nodes(circuit)); edges =  $(num_edges(circuit)); params = $(num_parameters(circuit))")
+        verbose && println("Iteration $iter/$maxiter. LogLikelihood = $(ll); nodes = $(num_nodes(circuit)); edges =  $(num_edges(circuit)); params = $(num_parameters(circuit))")
         iter += 1
         false
     end
     log_per_iter(pc)
     pc = struct_learn(pc; 
         primitives=[pc_split_step], kwargs=Dict(pc_split_step=>()), 
-        maxiter=maxiter, stop=log_per_iter)
+        maxiter=maxiter, stop=log_per_iter, verbose=verbose)
 
     if return_vtree
         pc, vtree

diff --git a/test/queries/likelihood_tests.jl b/test/queries/likelihood_tests.jl
@@ -46,8 +46,8 @@ include("../helper/gpu.jl")
 
     # Test Sturdel EVI
     samples, _ = sample(prob_circuit, 100000)
-    mix, weights, _ = @suppress_out learn_strudel(DataFrame(convert(BitArray, samples)); num_mix = 10,
-                                    init_maxiter = 20, em_maxiter = 100)
+    mix, weights, _ = learn_strudel(DataFrame(convert(BitArray, samples)); num_mix = 10,
+                                    init_maxiter = 20, em_maxiter = 100, verbose = false)
     mix_calc_prob = exp.(EVI(mix, data, weights))
 
     @test true_prob ≈ mix_calc_prob atol = 0.1

diff --git a/test/queries/marginal_flow_tests.jl b/test/queries/marginal_flow_tests.jl
@@ -65,8 +65,8 @@ include("../helper/gpu.jl")
 
     # Strudel Marginal Flow Test
     samples, _ = sample(prob_circuit, 100000)
-    mix, weights, _ = @suppress_out learn_strudel(DataFrame(convert(BitArray, samples)); 
-                                        num_mix = 10, init_maxiter = 20, em_maxiter = 100)
+    mix, weights, _ = learn_strudel(DataFrame(convert(BitArray, samples)); num_mix = 10,
+                                    init_maxiter = 20, em_maxiter = 100, verbose = false)
     mix_calc_prob = exp.(MAR(mix, data_marg, weights))
     @test true_prob ≈ mix_calc_prob atol = 0.1
     test_complete_mar(mix, data_full, weights, 0.1)

diff --git a/test/structurelearner/learner_tests.jl b/test/structurelearner/learner_tests.jl
@@ -46,4 +46,8 @@ using Suppressor
     @test num_parameters(pc3) == 60
     @test num_nodes(pc3) == 88
     @test log_likelihood_avg(pc3, data) ≈ -3.0466585640216746 atol=1e-6
-end
+
+    # Test when there are more iterations than candidates.
+    data = DataFrame(convert(BitArray, rand(Bool, 100, 4)))
+    @test_nowarn pc = learn_circuit(data; maxiter = 100, verbose = false)
+end