diff --git a/examples/binomial_mnist.jl b/examples/binomial_mnist.jl
index e125fbd1..cb5fca7f 100644
--- a/examples/binomial_mnist.jl
+++ b/examples/binomial_mnist.jl
@@ -22,7 +22,7 @@ function truncate(data::Matrix; bits)
 end
 
 function run(; batch_size = 512, num_epochs1 = 1, num_epochs2 = 1, num_epochs3 = 20, 
-             pseudocount = 0.1, latents = 32, param_inertia1 = 0.2, param_inertia2 = 0.9, param_inertia3 = 0.95)
+             pseudocount = 0.01, latents = 32, param_inertia1 = 0.2, param_inertia2 = 0.9, param_inertia3 = 0.95)
     train, test = mnist_cpu()
     train_gpu, test_gpu = mnist_gpu()
     # train_gpu = train_gpu[1:1024, :]
diff --git a/examples/cat_rat_mnist.jl b/examples/cat_rat_mnist.jl
index d530ae48..5e6ae152 100644
--- a/examples/cat_rat_mnist.jl
+++ b/examples/cat_rat_mnist.jl
@@ -4,7 +4,7 @@ using MLDatasets
 using CUDA
 using Images
 
-device!(collect(devices())[2])
+# device!(collect(devices())[2])
 
 function mnist_cpu()
     train_int = transpose(reshape(MNIST.traintensor(UInt8), 28*28, :));
@@ -39,45 +39,48 @@ function generate_rat(train)
     RAT(num_features; num_nodes_region, num_nodes_leaf, rg_depth, rg_replicas, input_type, balance_childs_parents)
 end
 
-function run()
+function run(; batch_size = 256, num_epochs1 = 1, num_epochs2 = 1, num_epochs3 = 20, 
+    pseudocount = 0.01, param_inertia1 = 0.2, param_inertia2 = 0.9, param_inertia3 = 0.9)
+
     train, test = mnist_cpu();
     train_gpu, test_gpu = mnist_gpu();
     trunc_train = truncate(train; bits = 5);
 
-    # println("Generating HCLT structure with $latents latents... ");
-    # @time pc = hclt(trunc_train[1:5000,:], latents; num_cats = 256, pseudocount = 0.1, input_type = CategoricalDist);
-    # init_parameters(pc; perturbation = 0.4);
-    print("Generating RAT SPN....")
+    @info "Generating RAT SPN...."
     @time pc = generate_rat(trunc_train);
     init_parameters(pc; perturbation = 0.4);
 
     println("Number of free parameters: $(num_parameters(pc))")
 
-    print("Moving circuit to GPU... ")
+    @info "Moving circuit to GPU... "
     CUDA.@time bpc = CuBitsProbCircuit(BitsProbCircuit(pc));
 
-    batch_size  = 2048
-    pseudocount = 0.01
+    @show length(bpc.nodes)
+
+    @info "EM"
     softness    = 0
-    epochs_1      = 5
-    epochs_2      = 5
-    epochs_3      = 10
-    @time mini_batch_em(bpc, train_gpu, epochs_1; batch_size, pseudocount, 
-    			 softness, param_inertia = 0.2, param_inertia_end = 0.9)
+    @time mini_batch_em(bpc, train_gpu, num_epochs1; batch_size, pseudocount, 
+    			 softness, param_inertia = param_inertia1, param_inertia_end = param_inertia2)
                      
-    @time mini_batch_em(bpc, train_gpu, epochs_2; batch_size, pseudocount, 
-    			 softness, param_inertia = 0.9, param_inertia_end = 0.95)
+    @time mini_batch_em(bpc, train_gpu, num_epochs2; batch_size, pseudocount, 
+    			 softness, param_inertia = param_inertia2, param_inertia_end = param_inertia3)
     
-    @time full_batch_em(bpc, train_gpu, epochs_3; batch_size, pseudocount, softness)
+    for iter=1:num_epochs3
+        @info "Iter $iter"
+        @time full_batch_em(bpc, train_gpu, 5; batch_size, pseudocount, softness)
+
+        ll3 = loglikelihood(bpc, test_gpu; batch_size)
+        println("test LL: $(ll3)")
+
+        @time do_sample(bpc, iter)
+    end
 
     print("update parameters")
     @time ProbabilisticCircuits.update_parameters(bpc);
-    print("Save to file")
-    @time write("rat_cat.jpc.gz", pc);
-    return circuit, bpc
+    return pc, bpc
 end
 
-function do_sample(bpc)
+function do_sample(bpc, iter=999)
     CUDA.@time sms = sample(bpc, 100, 28*28, [UInt32]);
 
     do_img(i) = begin
@@ -88,8 +91,20 @@ function do_sample(bpc)
 
     arr = [do_img(i) for i=1:size(sms, 1)]
     imgs = mosaicview(arr, fillvalue=1, ncol=10, npad=4)
-    save("samples.png", imgs) 
+    save("samples/rat_samples_$(iter).png", imgs) 
+end
+
+function try_map(pc, bpc)
+    @info "MAP"
+    train_gpu, _ = mnist_gpu();
+    data = Array{Union{Missing, UInt32}}(train_gpu[1:10, :]);
+    data[:, 1:400] .= missing;
+    data_gpu = cu(data);
+
+    # @time MAP(pc, data; batch_size=10)
+    MAP(bpc, data_gpu; batch_size=10)
 end
 
-# circuit, bpc = run();
-#do_sample(bpc)
\ No newline at end of file
+pc, bpc = run(; batch_size = 128, num_epochs1 = 2, num_epochs2 = 2, num_epochs3 = 2);
+# do_sample(bpc)
+# try_map(pc, bpc)
\ No newline at end of file