-
Notifications
You must be signed in to change notification settings - Fork 6
/
README.jl
109 lines (60 loc) · 2.19 KB
/
README.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
using JLBoost, RDatasets
iris = dataset("datasets", "iris");
iris[!, :is_setosa] = iris[!, :Species] .== "setosa";
target = :is_setosa;
features = setdiff(names(iris), ["Species", "is_setosa"]);
# fit one tree
# ?jlboost for more details
xgtreemodel = jlboost(iris, target)
typeof(trees(xgtreemodel))
typeof(xgtreemodel.loss)
typeof(xgtreemodel.target)
xgtreemodel2 = jlboost(iris, target; nrounds = 2, max_depth = 2)
xgtreemodel3 = jlboost(iris, target; nrounds = 2, max_leaves = 8, max_depth = 0)
iris.pred1 = JLBoost.predict(xgtreemodel, iris);
iris.pred2 = JLBoost.predict(xgtreemodel2, iris);
iris.pred1_plus_2 = JLBoost.predict(vcat(xgtreemodel, xgtreemodel2), iris)
first(iris.pred1_plus_2, 8)
iris.pred1 = xgtreemodel(iris);
iris.pred2 = xgtreemodel2(iris);
iris.pred1_plus_2 =([xgtreemodel, xgtreemodel2])(iris)
first(iris.pred1_plus_2, 8)
AUC(-iris.pred1, iris.is_setosa)
gini(-iris.pred1, iris.is_setosa)
feature_importance(xgtreemodel2, iris)
nrow(df) # returns the number of rows
ncol(df)
view(df, rows, cols)
using DataFrames
using JLBoost
df = DataFrame(x = rand(100) * 100);
df[!, :y] = 2*df.x .+ rand(100);
target = :y;
features = [:x];
warm_start = fill(0.0, nrow(df));
using LossFunctions: L2DistLoss;
loss = L2DistLoss();
jlboost(df, target, features, warm_start, loss; max_depth=2) # default max_depth = 6
JLBoost.save(xgtreemodel, "model.jlb");
JLBoost.save(trees(xgtreemodel), "model_tree.jlb");
JLBoost.load("model.jlb");
JLBoost.load("model_tree.jlb");
using JLBoost, RDatasets, JDF
iris = dataset("datasets", "iris");
iris[!, :is_setosa] = iris[!, :Species] .== "setosa";
target = :is_setosa;
features = setdiff(Symbol.(names(iris)), [:Species, :is_setosa]);
savejdf("iris.jdf", iris);
irisdisk = JDFFile("iris.jdf");
# fit using on disk JDF format
xgtree1 = jlboost(irisdisk, target, features);
xgtree2 = jlboost(iris, target, features; nrounds = 2, max_depth = 2);
# predict using on disk JDF format
iris.pred1 = predict(xgtree1, irisdisk);
iris.pred2 = predict(xgtree2, irisdisk);
# AUC
AUC(-predict(xgtree1, irisdisk), irisdisk[:, :is_setosa]);
# gini
gini(-predict(xgtree1, irisdisk), irisdisk[:, :is_setosa]);
# clean up
rm("iris.jdf", force=true, recursive=true);