-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcheck-analysis-time.R
90 lines (69 loc) · 3.01 KB
/
check-analysis-time.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
library('ggplot2')
library('getopt')
## Specify parameters
spec <- matrix(c(
'experiment', 'e', 1, 'character', 'Experiment',
'run', 'r', 1, 'character', 'run name',
'help' , 'h', 0, 'logical', 'Display help'
), byrow=TRUE, ncol=5)
opt <- getopt(spec)
## if help was asked for print a friendly message
## and exit with a non-zero error code
if (!is.null(opt$help)) {
cat(getopt(spec, usage=TRUE))
q(status=1)
}
## Check experiment input
stopifnot(opt$experiment %in% c('brainspan'))
chrs <- paste0('chr', c(1:22, 'X', 'Y'))
study <- opt$experiment
run <- opt$run
timediff <- lapply(chrs, function(chr) {
info <- tryCatch(system(paste0('grep permutation *', study, '*', run, '*', chr, '.e*'), intern = TRUE), warning = function(w) { 'no data'})
if(info[1] == 'no data') {
info <- tryCatch(system(paste0('grep permutation ', file.path(study, 'derAnalysis', run, chr, 'logs'), '/*', chr, '.e*'), intern = TRUE), warning = function(w) { 'no data'})
}
if(info[1] == 'no data') return(NULL)
time <- strptime(gsub('([[:space:]]*calculate.*$)', '', info),
format = '%Y-%m-%d %H:%M:%S')
idx <- seq_len(length(info) - 1)
difftime(time[idx + 1], time[idx], units = 'mins')
})
names(timediff) <- chrs
## Organize time information
chrnum <- gsub('chr', '', chrs)
df <- data.frame(chr = factor(chrnum, levels = chrnum), mean = sapply(timediff, mean), sd = sapply(timediff, sd))
## Group by number of rounds per permutation given the number of chunks & cores used
if(!file.exists(file.path(study, 'derAnalysis', run, 'nChunks.Rdata'))) {
nChunks <- sapply(chrs, function(chr) {
if(!file.exists(file.path(study, 'derAnalysis', run, chr, 'coveragePrep.Rdata')))
return(NA)
load(file.path(study, 'derAnalysis', run, chr, 'coveragePrep.Rdata'))
max(prep$mclapply)
})
save(nChunks, file = file.path(study, 'derAnalysis', run, 'nChunks.Rdata'))
} else {
load(file.path(study, 'derAnalysis', run, 'nChunks.Rdata'))
}
if (study == 'brainspan') {
nCores <- c(40, 32, 27, rep(20, 15), 29, rep(20, 4), 2)
} else {
stop("Invalid 'study' value")
}
names(nCores) <- chrs
df$n <- sapply(timediff, length)
df$se <- df$sd / sqrt(df$n)
df$nChunks <- nChunks
df$nCores <- nCores
df$nRound <- factor(ceiling(nChunks / nCores))
## Print info
rownames(df) <- NULL
print(df)
## Make plot
pdf(file.path(study, 'derAnalysis', run, paste0('permuteTime-', study, '-', run, '.pdf')))
ggplot(df, aes(x = chr, y = mean, color = nRound)) + geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.1) + geom_line() + geom_point() + ylab('Time per permutation (minutes)\nMean +- SE') + xlab('Chromosome') + ggtitle(paste('Time info for', study, run)) + scale_y_continuous(breaks=seq(0, ceiling(max(df$mean + df$se, na.rm = TRUE)), 1))
dev.off()
print('Expected total number of days per chr and days remaining')
days <- data.frame(chr = chrnum, total = round(df$mean * 1001 / 60 / 24, 1), remaining = round(df$mean * (1001 - df$n - 2 ) / 60 / 24, 1))
rownames(days) <- NULL
print(days)