-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunction_k_fold_cv_result.R
46 lines (40 loc) · 1.36 KB
/
function_k_fold_cv_result.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
library(MASS) # for the example dataset
library(plyr) # for recoding data
library(ROCR) # for plotting roc
library(AUC) # for calculate AUC
library(class) # for knn
library(e1071) # for NB and SVM
library(rpart) # for decision tree
library(ada) # for adaboost
## set the seed so you can get exactly the same results whenever you run the code
set.seed(12345)
k.fold.cv.result <- function(dataset, cl.name, k.fold=10, prob.cutoff=0.5) {
## default: 10-fold CV, cut-off 0.5
n.obs <- nrow(dataset) # no. of observations
## shuffle the row index
s=sample(n.obs)
errors=dim(k.fold)
accuracys=dim(k.fold)
precisions=dim(k.fold)
recalls=dim(k.fold)
f1scores=dim(k.fold)
AUCs=dim(k.fold)
probs=NULL
actuals=NULL
for (k in 1:k.fold) {
test.idx=which(s%% k.fold==(k-1)) # use modular operator
train.set=dataset[-test.idx,]
test.set=dataset[test.idx,]
cat(k.fold,'-fold CV run',k,cl.name,':',
'#training:',nrow(train.set),
'#testing:',nrow(test.set),'\n','\n')
## prob is an array of probabilities for cases being positive
prob=do.classification(train.set, test.set, cl.name)
actual=test.set$Y
probs = c(probs,prob)
actuals = c(actuals,actual)
## you may compute other measures and store them in arrays
}
result=data.frame(probs,actuals)
return(result)
}