-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.r
76 lines (62 loc) · 3.35 KB
/
utils.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
ConfusionMatrix <- function(y_pred, y_true) {
Confusion_Mat <- table(y_true, y_pred)
return(Confusion_Mat)
}
ConfusionDF <- function(y_pred, y_true) {
Confusion_DF <- transform(as.data.frame(ConfusionMatrix(y_pred, y_true)),
y_true = as.character(y_true),
y_pred = as.character(y_pred),
Freq = as.integer(Freq))
return(Confusion_DF)
}
Precision_micro <- function(y_true, y_pred, labels = NULL) {
Confusion_DF <- ConfusionDF(y_pred, y_true)
if (is.null(labels) == TRUE) labels <- unique(c(y_true, y_pred))
# this is not bulletproof since there might be labels missing (in strange cases)
# in strange cases where they existed in training set but are missing from test ground truth and predictions.
TP <- c()
FP <- c()
for (i in c(1:length(labels))) {
positive <- labels[i]
# it may happen that a label is never predicted (missing from y_pred) but exists in y_true
# in this case ConfusionDF will not have these lines and thus the simplified code crashes
# TP[i] <- as.integer(Confusion_DF[which(Confusion_DF$y_true==positive & Confusion_DF$y_pred==positive), "Freq"])
# FP[i] <- as.integer(sum(Confusion_DF[which(Confusion_DF$y_true!=positive & Confusion_DF$y_pred==positive), "Freq"]))
# workaround:
# i don't want to change ConfusionDF since i don't know if the current behaviour is a feature or a bug.
tmp <- Confusion_DF[which(Confusion_DF$y_true==positive & Confusion_DF$y_pred==positive), "Freq"]
TP[i] <- if (length(tmp)==0) 0 else as.integer(tmp)
tmp <- Confusion_DF[which(Confusion_DF$y_true!=positive & Confusion_DF$y_pred==positive), "Freq"]
FP[i] <- if (length(tmp)==0) 0 else as.integer(sum(tmp))
}
Precision_micro <- sum(TP) / (sum(TP) + sum(FP))
return(Precision_micro)
}
Recall_micro <- function(y_true, y_pred, labels = NULL) {
Confusion_DF <- ConfusionDF(y_pred, y_true)
if (is.null(labels) == TRUE) labels <- unique(c(y_true, y_pred))
# this is not bulletproof since there might be labels missing (in strange cases)
# in strange cases where they existed in training set but are missing from test ground truth and predictions.
TP <- c()
FN <- c()
for (i in c(1:length(labels))) {
positive <- labels[i]
# short version, comment out due to bug or feature of Confusion_DF
# TP[i] <- as.integer(Confusion_DF[which(Confusion_DF$y_true==positive & Confusion_DF$y_pred==positive), "Freq"])
# FP[i] <- as.integer(sum(Confusion_DF[which(Confusion_DF$y_true==positive & Confusion_DF$y_pred!=positive), "Freq"]))
# workaround:
tmp <- Confusion_DF[which(Confusion_DF$y_true==positive & Confusion_DF$y_pred==positive), "Freq"]
TP[i] <- if (length(tmp)==0) 0 else as.integer(tmp)
tmp <- Confusion_DF[which(Confusion_DF$y_true==positive & Confusion_DF$y_pred!=positive), "Freq"]
FN[i] <- if (length(tmp)==0) 0 else as.integer(sum(tmp))
}
Recall_micro <- sum(TP) / (sum(TP) + sum(FN))
return(Recall_micro)
}
F1_Score_micro <- function(y_true, y_pred, labels = NULL) {
if (is.null(labels) == TRUE) labels <- unique(c(y_true, y_pred)) # possible problems if labels are missing from y_*
Precision <- Precision_micro(y_true, y_pred, labels)
Recall <- Recall_micro(y_true, y_pred, labels)
F1_Score_micro <- 2 * (Precision * Recall) / (Precision + Recall)
return(F1_Score_micro)
}