Skip to content

Commit

Permalink
Move general purpose functions to Utils
Browse files Browse the repository at this point in the history
  • Loading branch information
RenatoGeh committed Jul 6, 2021
1 parent 505b4a0 commit e90178a
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 54 deletions.
1 change: 1 addition & 0 deletions src/Utils/Utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@ module Utils

include("misc.jl")
include("information.jl")
include("sample.jl")

end #module
33 changes: 32 additions & 1 deletion src/Utils/misc.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
export to_long_mi, logsumexp_cuda,
pop_cuda!, push_cuda!, all_empty, length_cuda,
generate_all, generate_data_all
generate_all, generate_data_all, kfold

using DataFrames
using CUDA: CUDA
Expand Down Expand Up @@ -123,3 +123,34 @@ function generate_data_all(N::Int)
end
DataFrame(data_all)
end

#####################
# K-fold partitioning
#####################

"Returns a(n index) partitioning a la k-fold."
function kfold(n::Int, p::Int)::Vector{Tuple{UnitRange, Vector{Int}}}
F = Vector{Tuple{UnitRange, Vector{Int}}}(undef, p)
j = s = 1
k = n÷p
for i 1:n%p
if s > 1
I = collect(1:s-1)
if s+k < n append!(I, s+k+1:n) end
else I = collect(s+k+1:n) end
F[j] = (s:s+k, I)
s += k+1
j += 1
end
k = n÷p-1
for i 1:p-n%p
if s > 1
I = collect(1:s-1)
if s+k < n append!(I, s+k+1:n) end
else I = collect(s+k+1:n) end
F[j] = (s:s+k, I)
s += k+1
j += 1
end
return F
end
34 changes: 34 additions & 0 deletions src/Utils/sample.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
export sample_vtree

using Random
using LogicCircuits: Vtree, PlainVtreeLeafNode

#####################
# Sampling functions
#####################

"""Split `X` into two partitions `A` and `B`, where `A` is a Bernoulli sample of each element in
`X` with probability `p` and `B=X∖A`. Guarantees at least one element in `A` and `B`."""
function bernoulli_partition(X::Vector{Int}, p::Float64)::Tuple{Vector{Int}, Vector{Int}}
n = length(X)
a = rand(1:n)
b = ((rand(0:n-2)+a)%n)+1
A, B = Int[X[a]], Int[X[b]]
for (i, x) enumerate(X)
if i == a continue end
if i == b continue end
if rand() > p push!(B, x)
else push!(A, x) end
end
return A, B
end

"Samples a Vtree with a right bias of `p`. If `p<0`, then uniformly sample vtrees."
function sample_vtree(n::Int, p::Float64)::Vtree
passdown(x::Int)::Vtree = PlainVtreeLeafNode(x)
function passdown(X::Vector{Int})::Vtree
R, L = bernoulli_partition(X, p)
return Vtree(passdown(length(L) == 1 ? L[1] : L), passdown(length(R) == 1 ? R[1] : R))
end
return p < 0 ? Vtree(n, :random) : passdown(shuffle!(collect(1:n)))
end
1 change: 1 addition & 0 deletions src/ensembles/bmc.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
export BayesModelComb, bmc_sample_psdd

using Distributions: Dirichlet
using ..Utils: sample_vtree

"Bayesian Model Combination."
mutable struct BayesModelComb{T <: ProbCircuit}
Expand Down
54 changes: 1 addition & 53 deletions src/ensembles/ensembles.jl
Original file line number Diff line number Diff line change
@@ -1,33 +1,8 @@
export Ensemble, ensemble_sample_psdd, sample_vtree

using ..Utils: sample_vtree, kfold
using ThreadPools

"""Split `X` into two partitions `A` and `B`, where `A` is a Bernoulli sample of each element in
`X` with probability `p` and `B=X∖A`. Guarantees at least one element in `A` and `B`."""
function bernoulli_partition(X::Vector{Int}, p::Float64)::Tuple{Vector{Int}, Vector{Int}}
n = length(X)
a = rand(1:n)
b = ((rand(0:n-2)+a)%n)+1
A, B = Int[X[a]], Int[X[b]]
for (i, x) enumerate(X)
if i == a continue end
if i == b continue end
if rand() > p push!(B, x)
else push!(A, x) end
end
return A, B
end

"Samples a Vtree with a right bias of `p`. If `p<0`, then uniformly sample vtrees."
function sample_vtree(n::Int, p::Float64)::Vtree
passdown(x::Int)::Vtree = PlainVtreeLeafNode(x)
function passdown(X::Vector{Int})::Vtree
R, L = bernoulli_partition(X, p)
return Vtree(passdown(length(L) == 1 ? L[1] : L), passdown(length(R) == 1 ? R[1] : R))
end
return p < 0 ? Vtree(n, :random) : passdown(shuffle!(collect(1:n)))
end

"Weighted ensemble of probabilistic circuits."
mutable struct Ensemble{T <: ProbCircuit}
C::Vector{T}
Expand Down Expand Up @@ -110,33 +85,6 @@ function learn_ensemble_em!(E::Ensemble{T}, D::DataFrame; maxiter::Integer = 100
return E
end

"Returns a(n index) partitioning a la k-fold."
function kfold(n::Int, p::Int)::Vector{Tuple{UnitRange, Vector{Int}}}
F = Vector{Tuple{UnitRange, Vector{Int}}}(undef, p)
j = s = 1
k = n÷p
for i 1:n%p
if s > 1
I = collect(1:s-1)
if s+k < n append!(I, s+k+1:n) end
else I = collect(s+k+1:n) end
F[j] = (s:s+k, I)
s += k+1
j += 1
end
k = n÷p-1
for i 1:p-n%p
if s > 1
I = collect(1:s-1)
if s+k < n append!(I, s+k+1:n) end
else I = collect(s+k+1:n) end
F[j] = (s:s+k, I)
s += k+1
j += 1
end
return F
end

"Learns the weights of the Ensemble by Stacking, with `k` as the number of folds in k-fold."
function learn_ensemble_stacking!(E::Ensemble{T}, D::DataFrame; maxiter::Integer = 100,
k::Integer = min(nrow(D), 5), pseudocount::Real = 1.0,
Expand Down

0 comments on commit e90178a

Please sign in to comment.