Skip to content

Commit

Permalink
Merge pull request #22 from Evovest/turbo
Browse files Browse the repository at this point in the history
turbo refactor
  • Loading branch information
jeremiedb authored Jul 14, 2019
2 parents 3f7c377 + 89b4080 commit bd33288
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 10 deletions.
13 changes: 11 additions & 2 deletions src/find_split.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,21 @@ function binarize(X, edges)
X_bin
end

function find_bags(x::Vector{T}, edges::Vector{T}) where T<:Real
function find_bags(x_bin::Vector{T}) where T <: Real
𝑖 = 1:length(x_bin) |> collect
bags = [BitSet() for _ in 1:maximum(x_bin)]
for bag in 1:length(bags)
bags[bag] = BitSet(𝑖[x_bin .== bag])
end
return bags
end

function find_bags_1(x::Vector{T}, edges::Vector{T}) where T<:Real
idx = BitSet(1:length(x) |> collect)
bags = [BitSet() for _ in 1:length(edges)]
for i in idx
bin = 1
while x[i] > edges[bin]
while x[i] >= edges[bin]
bin +=1
end
union!(bags[bin], i)
Expand Down
2 changes: 1 addition & 1 deletion src/predict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ function predict!(pred, tree::Tree, X::AbstractArray{T, 2}) where T<:Real
id = 1
x = view(X, i, :)
while tree.nodes[id].split
if x[tree.nodes[id].feat] <= tree.nodes[id].cond
if x[tree.nodes[id].feat] < tree.nodes[id].cond
id = tree.nodes[id].left
else
id = tree.nodes[id].right
Expand Down
4 changes: 2 additions & 2 deletions src/tree_vector.jl
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ function grow_gbtree(X::AbstractArray{R, 2}, Y::AbstractArray{T, 1}, params::Evo
edges = get_edges(X, params.nbins)
bags = Vector{Vector{BitSet}}(undef, size(𝑗_, 1))
@threads for feat in 1:size(𝑗_, 1)
bags[feat] = find_bags(X[:,feat], edges[feat])
bags[feat] = find_bags(X_bin[:,feat])
end

# initialize train nodes
Expand Down Expand Up @@ -196,7 +196,7 @@ function grow_gbtree!(model::GBTree, X::AbstractArray{R, 2}, Y::AbstractArray{T,
edges = get_edges(X, params.nbins)
bags = Vector{Vector{BitSet}}(undef, size(𝑗_, 1))
@threads for feat in 1:size(𝑗_, 1)
bags[feat] = find_bags(X[:,feat], edges[feat])
bags[feat] = find_bags(X_bin[:,feat])
end

# initialize train nodes
Expand Down
17 changes: 12 additions & 5 deletions test/datacup.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ names(data)

features = data[1:53]
X = convert(Array, features)
X = X + randn(size(X)) * 0.0001
# X = X + randn(size(X)) * 0.0001
Y = data[54]
Y = convert(Array{Float64}, Y)
𝑖 = collect(1:size(X,1))
Expand All @@ -32,12 +32,12 @@ Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval]

params1 = EvoTreeRegressor(
loss=:logistic, metric=:logloss,
nrounds=10, nbins=16,
nrounds=100, nbins=16,
λ = 0.0, γ=0.0, η=0.1,
max_depth = 6, min_weight = 1.0,
rowsample=0.5, colsample=0.5)
rowsample=0.5, colsample=0.5, seed = 127)

@time model = grow_gbtree(X_train, Y_train, params1, X_eval = X_eval, Y_eval = Y_eval, print_every_n = 1)
@time model = grow_gbtree(X_train, Y_train, params1, X_eval = X_eval, Y_eval = Y_eval, print_every_n = 10)
@time model = grow_gbtree(X_train, Y_train, params1, print_every_n = 1)
@time pred_train_linear = EvoTrees.predict(model, X_train)

Expand Down Expand Up @@ -70,11 +70,18 @@ end
@time edges = get_edges(X, params1.nbins)
@time X_bin = binarize(X, edges)

# manual check
x1 = edges[2]
x2 = [0, x1[1], 0.1, x1[2], 0.5, x1[3], 0.95, x1[4]]
x2_bin = searchsortedlast.(Ref(edges[2][1:end-1]), x2) .+ 1
x2_bag = find_bags(x2_bin)

function prep(X, params)
edges = get_edges(X, params.nbins)
X_bin = binarize(X, edges)
bags = Vector{Vector{BitSet}}(undef, size(𝑗, 1))
for feat in 1:size(𝑗, 1)
bags[feat] = find_bags(X[:,feat], edges[feat])
bags[feat] = find_bags(X_bin[:,feat])
end
return bags
end
Expand Down

0 comments on commit bd33288

Please sign in to comment.