From a1739e2ddeee330914ba0c5b7812016f42e03c3c Mon Sep 17 00:00:00 2001
From: Meihua D <mhdang0026@gmail.com>
Date: Mon, 11 Apr 2022 15:53:53 -0700
Subject: [PATCH 1/5] minor fixes

---
 src/parameters/em.jl | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/src/parameters/em.jl b/src/parameters/em.jl
index 08cea350..094ab107 100644
--- a/src/parameters/em.jl
+++ b/src/parameters/em.jl
@@ -357,7 +357,10 @@ function full_batch_em(bpc::CuBitsProbCircuit, raw_data::CuArray, num_epochs;
             marginals, flows, node_aggr, edge_aggr, 
             mine, maxe, debug)
         push!(log_likelihoods, log_likelihood)
-        call(callbacks, epoch, log_likelihood)
+        done = call(callbacks, epoch, log_likelihood)
+        if !isnothing(done) && done[end] == true
+            break
+        end
     end
     
     cleanup_memory((data, raw_data), (flows, flows_mem), 
@@ -467,10 +470,13 @@ function mini_batch_em(bpc::CuBitsProbCircuit, raw_data::CuArray, num_epochs;
         end
         log_likelihood = sum(log_likelihoods_epoch) / batch_size / num_batches
         push!(log_likelihoods, log_likelihood)
-        call(callbacks, epoch, log_likelihood)
+        done = call(callbacks, epoch, log_likelihood)
 
         param_inertia += Δparam_inertia
         flow_memory += Δflow_memory
+        if !isnothing(done) && done[end] == true
+            break
+        end
     end
 
     cleanup_memory((data, raw_data), (flows, flows_mem), 
@@ -490,10 +496,11 @@ end
 
 function call(callbacks::CALLBACKList, epoch, log_likelihood)
     if callbacks.list[1].verbose
-        for x in callbacks.list
+        done = map(callbacks.list) do x
             call(x, epoch, log_likelihood)
         end
         println()
+        done
     end
 end
 
@@ -543,16 +550,20 @@ call(caller::FullBatchLog, epoch, log_likelihood) = begin
     caller.verbose && print("Full-batch EM epoch $epoch; train LL $log_likelihood")
 end
 call(caller::LikelihoodsLog, epoch, log_likelihood) = begin
+    valid_ll, test_ll = nothing, nothing
     if epoch % caller.iter == 0 && (!isnothing(caller.valid_x) || !isnothing(caller.test_x))
         if !isnothing(caller.valid_x)
-            print("; valid LL ", loglikelihood(caller.bpc, caller.valid_x; 
-                batch_size=caller.batch_size,mars_mem=caller.mars_mem))
+            valid_ll = loglikelihood(caller.bpc, caller.valid_x; 
+                batch_size=caller.batch_size,mars_mem=caller.mars_mem)
+            print("; valid LL ", valid_ll)
         end
         if !isnothing(caller.test_x)
-            print("; test LL ", loglikelihood(caller.bpc, caller.test_x; 
-                batch_size=caller.batch_size,mars_mem=caller.mars_mem))
+            test_ll = loglikelihood(caller.bpc, caller.test_x; 
+                batch_size=caller.batch_size,mars_mem=caller.mars_mem)
+            print("; test LL ", test_ll)
         end
     end
+    valid_ll, test_ll
 end
 
 cleanup(caller::CALLBACK) = nothing

From ab194f0d64a98415b7d6bd53152bf7041420a178 Mon Sep 17 00:00:00 2001
From: Meihua Dang <mhdang@ucla.edu>
Date: Mon, 2 May 2022 16:13:33 -0700
Subject: [PATCH 2/5] prune lower bound flag

---
 src/queries/flow.jl | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/src/queries/flow.jl b/src/queries/flow.jl
index b6e1c452..f6381e2c 100644
--- a/src/queries/flow.jl
+++ b/src/queries/flow.jl
@@ -6,7 +6,8 @@ using CUDA, Random
 
 function layer_down_kernel(flows, edge_aggr, edges, _mars,
             num_ex_threads::Int32, num_examples::Int32, 
-            layer_start::Int32, edge_work::Int32, layer_end::Int32)
+            layer_start::Int32, edge_work::Int32, layer_end::Int32,
+            pruneflag::Bool)
 
     mars = Base.Experimental.Const(_mars)
         
@@ -69,7 +70,14 @@ function layer_down_kernel(flows, edge_aggr, edges, _mars,
         # make sure this is run on all warp threads, regardless of `active`
         if !isnothing(edge_aggr)
             !active && (edge_flow = zero(Float32))
-            edge_flow_warp = CUDA.reduce_warp(+, edge_flow)
+            if pruneflag && active
+                bound_ef = log(one(Float32) - edge_flow)
+                bound_ef = isnan(bound_ef) ? zero(Float32) : bound_ef
+                edge_flow_warp = CUDA.reduce_warp(+, bound_ef)
+            else
+                edge_flow_warp = CUDA.reduce_warp(+, edge_flow)
+            end
+
             if warp_lane == 1
                 CUDA.@atomic edge_aggr[edge_id] += edge_flow_warp
             end
@@ -102,12 +110,12 @@ end
 
 function layer_down(flows, edge_aggr, bpc, mars, 
                     layer_start, layer_end, num_examples; 
-                    mine, maxe, debug=false)
+                    mine, maxe, debug=false, pruneflag=false)
     edges = bpc.edge_layers_down.vectors
     num_edges = layer_end-layer_start+1
     dummy_args = (flows, edge_aggr, edges, mars, 
                   Int32(32), Int32(num_examples), 
-                  Int32(1), Int32(1), Int32(2))
+                  Int32(1), Int32(1), Int32(2), pruneflag)
     kernel = @cuda name="layer_down" launch=false layer_down_kernel(dummy_args...) 
     config = launch_configuration(kernel.fun)
 
@@ -117,7 +125,7 @@ function layer_down(flows, edge_aggr, bpc, mars,
     
     args = (flows, edge_aggr, edges, mars, 
             Int32(num_example_threads), Int32(num_examples), 
-            Int32(layer_start), Int32(edge_work), Int32(layer_end))
+            Int32(layer_start), Int32(edge_work), Int32(layer_end), pruneflag)
     if debug
         println("Layer $layer_start:$layer_end")
         @show threads blocks num_example_threads edge_work, num_edges num_examples
@@ -128,7 +136,7 @@ function layer_down(flows, edge_aggr, bpc, mars,
     nothing
 end
 
-function flows_circuit(flows, edge_aggr, bpc, mars, num_examples; mine, maxe, debug=false)
+function flows_circuit(flows, edge_aggr, bpc, mars, num_examples; mine, maxe, debug=false, pruneflag=false)
     init_flows() = begin 
         flows .= zero(Float32)
         flows[:,end] .= one(Float32)
@@ -144,7 +152,7 @@ function flows_circuit(flows, edge_aggr, bpc, mars, num_examples; mine, maxe, de
     for layer_end in bpc.edge_layers_down.ends
         layer_down(flows, edge_aggr, bpc, mars, 
                    layer_start, layer_end, num_examples; 
-                   mine, maxe, debug)
+                   mine, maxe, debug, pruneflag)
         layer_start = layer_end + 1
     end
     nothing
@@ -207,9 +215,9 @@ end
 # Full downward pass
 ##################################################################################
 
-function probs_flows_circuit(flows, mars, edge_aggr, bpc, data, example_ids; mine, maxe, debug=false)
+function probs_flows_circuit(flows, mars, edge_aggr, bpc, data, example_ids; mine, maxe, debug=false, pruneflag=false)
     eval_circuit(mars, bpc, data, example_ids; mine, maxe, debug)
-    flows_circuit(flows, edge_aggr, bpc, mars, length(example_ids); mine, maxe, debug)
+    flows_circuit(flows, edge_aggr, bpc, mars, length(example_ids); mine, maxe, debug, pruneflag)
     input_flows_circuit(flows, bpc, data, example_ids; mine, maxe, debug)
     nothing
 end
\ No newline at end of file

From afd757239c307774b64ab3adf394cc8a1f5075a2 Mon Sep 17 00:00:00 2001
From: Meihua Dang <mhdang@ucla.edu>
Date: Tue, 3 May 2022 00:06:30 -0700
Subject: [PATCH 3/5] bound

---
 src/queries/flow.jl | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/src/queries/flow.jl b/src/queries/flow.jl
index f6381e2c..f478a2bc 100644
--- a/src/queries/flow.jl
+++ b/src/queries/flow.jl
@@ -6,8 +6,7 @@ using CUDA, Random
 
 function layer_down_kernel(flows, edge_aggr, edges, _mars,
             num_ex_threads::Int32, num_examples::Int32, 
-            layer_start::Int32, edge_work::Int32, layer_end::Int32,
-            pruneflag::Bool)
+            layer_start::Int32, edge_work::Int32, layer_end::Int32, flowflag::Int32)
 
     mars = Base.Experimental.Const(_mars)
         
@@ -70,10 +69,22 @@ function layer_down_kernel(flows, edge_aggr, edges, _mars,
         # make sure this is run on all warp threads, regardless of `active`
         if !isnothing(edge_aggr)
             !active && (edge_flow = zero(Float32))
-            if pruneflag && active
+            if active && issum && flowflag == 1
                 bound_ef = log(one(Float32) - edge_flow)
                 bound_ef = isnan(bound_ef) ? zero(Float32) : bound_ef
                 edge_flow_warp = CUDA.reduce_warp(+, bound_ef)
+            elseif active  && flowflag == 2
+                if issum
+                    bound_ef = (one(Float32) - exp(edge.logp)) / (one(Float32) - edge_flow)
+                    if bound_ef < 0
+                        bound_ef = zero(Float32)
+                    end
+                        bound_ef = log(bound_ef)
+                        bound_ef = isnan(bound_ef) ? zero(Float32) : bound_ef
+                    edge_flow_warp = CUDA.reduce_warp(+, bound_ef)
+                else
+                    edge_flow_warp = CUDA.reduce_warp(+, typemax(Float32))
+                end
             else
                 edge_flow_warp = CUDA.reduce_warp(+, edge_flow)
             end
@@ -110,12 +121,12 @@ end
 
 function layer_down(flows, edge_aggr, bpc, mars, 
                     layer_start, layer_end, num_examples; 
-                    mine, maxe, debug=false, pruneflag=false)
+                    mine, maxe, debug=false, flowflag=0)
     edges = bpc.edge_layers_down.vectors
     num_edges = layer_end-layer_start+1
     dummy_args = (flows, edge_aggr, edges, mars, 
                   Int32(32), Int32(num_examples), 
-                  Int32(1), Int32(1), Int32(2), pruneflag)
+                  Int32(1), Int32(1), Int32(2), Int32(flowflag))
     kernel = @cuda name="layer_down" launch=false layer_down_kernel(dummy_args...) 
     config = launch_configuration(kernel.fun)
 
@@ -125,7 +136,7 @@ function layer_down(flows, edge_aggr, bpc, mars,
     
     args = (flows, edge_aggr, edges, mars, 
             Int32(num_example_threads), Int32(num_examples), 
-            Int32(layer_start), Int32(edge_work), Int32(layer_end), pruneflag)
+            Int32(layer_start), Int32(edge_work), Int32(layer_end), Int32(flowflag))
     if debug
         println("Layer $layer_start:$layer_end")
         @show threads blocks num_example_threads edge_work, num_edges num_examples
@@ -136,7 +147,7 @@ function layer_down(flows, edge_aggr, bpc, mars,
     nothing
 end
 
-function flows_circuit(flows, edge_aggr, bpc, mars, num_examples; mine, maxe, debug=false, pruneflag=false)
+function flows_circuit(flows, edge_aggr, bpc, mars, num_examples; mine, maxe, debug=false, flowflag=0)
     init_flows() = begin 
         flows .= zero(Float32)
         flows[:,end] .= one(Float32)
@@ -152,7 +163,7 @@ function flows_circuit(flows, edge_aggr, bpc, mars, num_examples; mine, maxe, de
     for layer_end in bpc.edge_layers_down.ends
         layer_down(flows, edge_aggr, bpc, mars, 
                    layer_start, layer_end, num_examples; 
-                   mine, maxe, debug, pruneflag)
+                   mine, maxe, debug, flowflag)
         layer_start = layer_end + 1
     end
     nothing
@@ -215,9 +226,9 @@ end
 # Full downward pass
 ##################################################################################
 
-function probs_flows_circuit(flows, mars, edge_aggr, bpc, data, example_ids; mine, maxe, debug=false, pruneflag=false)
+function probs_flows_circuit(flows, mars, edge_aggr, bpc, data, example_ids; mine, maxe, debug=false, flowflag=0)
     eval_circuit(mars, bpc, data, example_ids; mine, maxe, debug)
-    flows_circuit(flows, edge_aggr, bpc, mars, length(example_ids); mine, maxe, debug, pruneflag)
+    flows_circuit(flows, edge_aggr, bpc, mars, length(example_ids); mine, maxe, debug, flowflag)
     input_flows_circuit(flows, bpc, data, example_ids; mine, maxe, debug)
     nothing
 end
\ No newline at end of file

From 06a3294a0aff873877f3bc31f1b006b5fb6c457c Mon Sep 17 00:00:00 2001
From: Meihua D <mhdang0026@gmail.com>
Date: Mon, 10 Oct 2022 14:40:11 -0700
Subject: [PATCH 4/5] data softening for binary dataset

---
 src/nodes/indicator_dist.jl | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/nodes/indicator_dist.jl b/src/nodes/indicator_dist.jl
index 3d3bee56..42ccd4fc 100644
--- a/src/nodes/indicator_dist.jl
+++ b/src/nodes/indicator_dist.jl
@@ -23,7 +23,11 @@ bits(d::Indicator, _ = nothing) = d
 unbits(d::Indicator, _ = nothing) = d
 
 loglikelihood(d::Indicator, value, _ = nothing) =
-    (d.value == value) ?  zero(Float32) : -Inf32
+    if value isa AbstractFloat && d isa Literal
+        (d.value) ? log(value) : log1p(-value)
+    else
+        (d.value == value) ?  zero(Float32) : -Inf32
+    end
 
 init_params(d::Indicator, _) = d
 

From 0c9fe8aa8a572256331c96d2c46067083a5f3092 Mon Sep 17 00:00:00 2001
From: Meihua D <mhdang0026@gmail.com>
Date: Mon, 10 Oct 2022 15:32:30 -0700
Subject: [PATCH 5/5] un-commit flow flag

---
 src/queries/flow.jl | 37 +++++++++----------------------------
 1 file changed, 9 insertions(+), 28 deletions(-)

diff --git a/src/queries/flow.jl b/src/queries/flow.jl
index f478a2bc..b6e1c452 100644
--- a/src/queries/flow.jl
+++ b/src/queries/flow.jl
@@ -6,7 +6,7 @@ using CUDA, Random
 
 function layer_down_kernel(flows, edge_aggr, edges, _mars,
             num_ex_threads::Int32, num_examples::Int32, 
-            layer_start::Int32, edge_work::Int32, layer_end::Int32, flowflag::Int32)
+            layer_start::Int32, edge_work::Int32, layer_end::Int32)
 
     mars = Base.Experimental.Const(_mars)
         
@@ -69,26 +69,7 @@ function layer_down_kernel(flows, edge_aggr, edges, _mars,
         # make sure this is run on all warp threads, regardless of `active`
         if !isnothing(edge_aggr)
             !active && (edge_flow = zero(Float32))
-            if active && issum && flowflag == 1
-                bound_ef = log(one(Float32) - edge_flow)
-                bound_ef = isnan(bound_ef) ? zero(Float32) : bound_ef
-                edge_flow_warp = CUDA.reduce_warp(+, bound_ef)
-            elseif active  && flowflag == 2
-                if issum
-                    bound_ef = (one(Float32) - exp(edge.logp)) / (one(Float32) - edge_flow)
-                    if bound_ef < 0
-                        bound_ef = zero(Float32)
-                    end
-                        bound_ef = log(bound_ef)
-                        bound_ef = isnan(bound_ef) ? zero(Float32) : bound_ef
-                    edge_flow_warp = CUDA.reduce_warp(+, bound_ef)
-                else
-                    edge_flow_warp = CUDA.reduce_warp(+, typemax(Float32))
-                end
-            else
-                edge_flow_warp = CUDA.reduce_warp(+, edge_flow)
-            end
-
+            edge_flow_warp = CUDA.reduce_warp(+, edge_flow)
             if warp_lane == 1
                 CUDA.@atomic edge_aggr[edge_id] += edge_flow_warp
             end
@@ -121,12 +102,12 @@ end
 
 function layer_down(flows, edge_aggr, bpc, mars, 
                     layer_start, layer_end, num_examples; 
-                    mine, maxe, debug=false, flowflag=0)
+                    mine, maxe, debug=false)
     edges = bpc.edge_layers_down.vectors
     num_edges = layer_end-layer_start+1
     dummy_args = (flows, edge_aggr, edges, mars, 
                   Int32(32), Int32(num_examples), 
-                  Int32(1), Int32(1), Int32(2), Int32(flowflag))
+                  Int32(1), Int32(1), Int32(2))
     kernel = @cuda name="layer_down" launch=false layer_down_kernel(dummy_args...) 
     config = launch_configuration(kernel.fun)
 
@@ -136,7 +117,7 @@ function layer_down(flows, edge_aggr, bpc, mars,
     
     args = (flows, edge_aggr, edges, mars, 
             Int32(num_example_threads), Int32(num_examples), 
-            Int32(layer_start), Int32(edge_work), Int32(layer_end), Int32(flowflag))
+            Int32(layer_start), Int32(edge_work), Int32(layer_end))
     if debug
         println("Layer $layer_start:$layer_end")
         @show threads blocks num_example_threads edge_work, num_edges num_examples
@@ -147,7 +128,7 @@ function layer_down(flows, edge_aggr, bpc, mars,
     nothing
 end
 
-function flows_circuit(flows, edge_aggr, bpc, mars, num_examples; mine, maxe, debug=false, flowflag=0)
+function flows_circuit(flows, edge_aggr, bpc, mars, num_examples; mine, maxe, debug=false)
     init_flows() = begin 
         flows .= zero(Float32)
         flows[:,end] .= one(Float32)
@@ -163,7 +144,7 @@ function flows_circuit(flows, edge_aggr, bpc, mars, num_examples; mine, maxe, de
     for layer_end in bpc.edge_layers_down.ends
         layer_down(flows, edge_aggr, bpc, mars, 
                    layer_start, layer_end, num_examples; 
-                   mine, maxe, debug, flowflag)
+                   mine, maxe, debug)
         layer_start = layer_end + 1
     end
     nothing
@@ -226,9 +207,9 @@ end
 # Full downward pass
 ##################################################################################
 
-function probs_flows_circuit(flows, mars, edge_aggr, bpc, data, example_ids; mine, maxe, debug=false, flowflag=0)
+function probs_flows_circuit(flows, mars, edge_aggr, bpc, data, example_ids; mine, maxe, debug=false)
     eval_circuit(mars, bpc, data, example_ids; mine, maxe, debug)
-    flows_circuit(flows, edge_aggr, bpc, mars, length(example_ids); mine, maxe, debug, flowflag)
+    flows_circuit(flows, edge_aggr, bpc, mars, length(example_ids); mine, maxe, debug)
     input_flows_circuit(flows, bpc, data, example_ids; mine, maxe, debug)
     nothing
 end
\ No newline at end of file