-
Notifications
You must be signed in to change notification settings - Fork 0
/
permutationTest.R
122 lines (110 loc) · 3.34 KB
/
permutationTest.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
permutationTest <- function(
## INPUT
NNM,
cell_labels,
alternative = 'greater',
nPerms = 1E3,
seed = 12345,
verbose = TRUE
){
alternative = tolower(alternative)
supported_alts <- c('greater', 'lower', 'two-tailed')
if(!(alternative %in% supported_alts)){
cat('\n\nERROR: UNSUPPORTED ALTERNATIVE! ONLY ACCEPTING:')
for(i in 1:length(supported_alts)){ cat(paste0('\n', alternative, '...'))}
break
}
if(is.null(dim(NNM))){
NNMatrixVector <- NNM
obs <- by(NNMatrixVector, cell_labels, sum)
obs <- setNames(as.numeric(obs), names(obs))
set.seed(seed)
nulldist <- list()
for(px in 1:nPerms){
if(verbose){
if(px==1 | px %% 100 == 0){
cat(paste0(px, ' of ', nPerms, '...'))
}
}
null_labs <- sample(cell_labels, size = length(cell_labels), replace = F)
expx <- by(NNMatrixVector, null_labs, sum)
expx <- setNames(as.numeric(expx), names(expx))
nulldist[[px]] <- expx
}
nulldist <- do.call(cbind, nulldist)
mean_exp <- apply(nulldist, 1, mean)
sd_exp <- apply(nulldist, 1, sd)
pvals <- sapply(1:nrow(nulldist), function(j){
pval = 1 - sum( obs[j] < nulldist[j,]) / nPerms
if(alternative == 'greater'){
pval = 1 - pval
}
if(alternative == 'two-tailed'){
pval = pmin(1 - pval, pval)
pval = pval * 2
}
return(pval)
})
OUT <- data.frame(
'OBS' = obs,
'OBS PERCENT' = round(100 * obs / sum(obs), digits=2),
'MEAN EXP' = mean_exp,
'SD EXP' = sd_exp,
'Z' = (obs - mean_exp)/sd_exp,
'PVAL' = pvals,
'SIG' = c('***', '**', '*', '.', '')[as.integer(cut(pvals, c(1, 0.1, 0.05, 0.01, 0.001, -1)))]
)
return(OUT)
}
start_time <- Sys.time()
l_o <- list()
for (c in 1:ncol(NNM)){
NNMatrixVector <- NNM[,c]
obs <- by(NNMatrixVector, cell_labels, sum)
obs <- setNames(as.numeric(obs), names(obs))
set.seed(seed)
nulldist <- list()
for(px in 1:nPerms){
if(verbose){
if(px==1 | px %% 100 == 0){
cat(paste0(px, ' of ', nPerms, '...'))
}
}
null_labs <- sample(cell_labels, size = length(cell_labels), replace = F)
expx <- by(NNMatrixVector, null_labs, sum)
expx <- setNames(as.numeric(expx), names(expx))
nulldist[[px]] <- expx
}
nulldist <- do.call(cbind, nulldist)
mean_exp <- apply(nulldist, 1, mean)
sd_exp <- apply(nulldist, 1, sd)
pvals <- sapply(1:nrow(nulldist), function(j){
pval = 1 - sum( obs[j] < nulldist[j,]) / nPerms
if(alternative == 'greater'){
pval = 1 - pval
}
if(alternative == 'two-tailed'){
pval = pmin(1 - pval, pval)
pval = pval * 2
}
return(pval)
})
OUT <- data.frame(
'OBS' = obs,
'OBS PERCENT' = round(100 * obs / sum(obs), digits=2),
'MEAN EXP' = mean_exp,
'SD EXP' = sd_exp,
'Z' = (obs - mean_exp)/sd_exp,
'PVAL' = pvals,
'SIG' = c('***', '**', '*', '.', '')[as.integer(cut(pvals, c(1, 0.1, 0.05, 0.01, 0.001, -1)))]
)
OUT$CTA <- names(obs)
OUT$CTB <- colnames(NNM)[c]
l_o[[c]] <- OUT
}
combined_df <- do.call("rbind", l_o)
end_time <- Sys.time()
time_elapsed <- end_time - start_time
print(paste("Total time elapsed:", time_elapsed))
return(combined_df)
}