-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathstep1-fullCoverage.R
94 lines (70 loc) · 2.94 KB
/
step1-fullCoverage.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
## Load the data without a filter, save it, then filter it for derfinder processing steps
## Load libraries
library('getopt')
## Available at http://www.bioconductor.org/packages/release/bioc/html/derfinder.html
library('derfinder')
library('BiocParallel')
library('devtools')
## Specify parameters
spec <- matrix(c(
'datadir', 'd', 1, 'character', 'Data directory, matched with rawFiles(datadir)',
'pattern', 'p', 1, 'character', 'Sample pattern',
'cutoff', 'c', 1, 'numeric', 'Filtering cutoff used',
'mcores', 'm', 1, 'integer', 'Number of cores',
'fileStyle', 'f', 2, 'character', 'FileStyle used for naming the chromosomes',
'help' , 'h', 0, 'logical', 'Display help'
), byrow=TRUE, ncol=5)
opt <- getopt(spec)
## if help was asked for print a friendly message
## and exit with a non-zero error code
if (!is.null(opt$help)) {
cat(getopt(spec, usage=TRUE))
q(status=1)
}
## Default value for fileStyle
if (is.null(opt$fileStyle)) opt$fileStyle <- 'UCSC'
## Normalize filtered coverage
totalMapped <- NULL
targetSize <- 80e6
## Identify the data directories
load("/users/ajaffe/Lieber/Projects/Grants/Coverage_R01/brainspan/brainspan_phenotype.rda")
bad_samples <- which(rownames(pdSpan) %in% c('216', '218', '219'))
pdSpan[bad_samples, ]
if(nrow(pdSpan) == 487) pdSpan <- pdSpan[-bad_samples, ]
stopifnot(nrow(pdSpan) == 484)
files <- pdSpan$wig
names(files) <- pdSpan$lab
## Load the coverage information without filtering
chrnums <- c(1:22, 'X', 'Y')
fullCov <- fullCoverage(files = files, chrs = chrnums, mc.cores = opt$mcores, fileStyle = opt$fileStyle, outputs = 'auto')
message(paste(Sys.time(), 'Saving the full (unfiltered) coverage data'))
save(fullCov, file='fullCov.Rdata')
rm(fullCov)
fullCov_files <- as.list(dir(pattern = 'chr'))
names(fullCov_files) <- sapply(fullCov_files, function(x) gsub('CovInfo.Rdata', '', x))
## Filter the data and save it by chr
myFilt <- function(chr, rawData_file, cutoff, totalMapped = NULL, targetSize = 80e6) {
library('derfinder')
## Load raw data
message(paste(Sys.time(), 'Loading raw file', rawData_file, 'for', chr))
load(rawData_file)
rawData <- get(paste0(chr, 'CovInfo'))
## Filter the data
message(paste(Sys.time(), 'Filtering chromosome', chr))
res <- filterData(data = rawData$coverage, cutoff = cutoff, index = NULL,
totalMapped = totalMapped, targetSize = targetSize)
## Save it in a unified name format
varname <- paste0(chr, 'CovInfo')
assign(varname, res)
output <- paste0(varname, '-filtered.Rdata')
## Save the filtered data
save(list = varname, file = output, compress='gzip')
## Finish
return(invisible(NULL))
}
message(paste(Sys.time(), 'Filtering and saving the data with cutoff', opt$cutoff))
filteredCov <- bpmapply(myFilt, names(fullCov_files), fullCov_files, BPPARAM = SnowParam(workers = opt$mcores), MoreArgs = list(cutoff = opt$cutoff, totalMapped = totalMapped, targetSize = targetSize))
## Done!
proc.time()
options(width = 120)
session_info()