-
Notifications
You must be signed in to change notification settings - Fork 3
/
run_chromVAR.R
87 lines (76 loc) · 3.28 KB
/
run_chromVAR.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#source("https://bioconductor.org/biocLite.R")
#biocLite("GenomeInfoDbData")
#biocLite("GO.db")
#biocLite("BiocInstaller")
#biocLite("remotes")
# install gfortran 6.1: download OS X El Capitan gfortran 6.1 from https://gcc.gnu.org/wiki/GFortranBinaries#MacOS-11 , and then manually install it.
#BiocInstaller::biocLite("GreenleafLab/chromVAR")
#biocLite("JASPAR2016")
#devtools::install_github("GreenleafLab/motifmatchr")
#devtools::install_github("GreenleafLab/chromVARmotifs")
#biocLite("BSgenome.Hsapiens.UCSC.hg19")
# biocLite("BSgenome.Mmusculus.UCSC.mm10")
#setwd("/Users/lihuazhang/Documents/scAIMF_package/codes_for_A549")
#paths
args <- commandArgs()
baseName <- args[6]
library(chromVAR)
library(motifmatchr)
library(SummarizedExperiment)
infile0 <- paste(baseName, "species.csv", sep="/")
project_species <- read.table(file = infile0)
project_species <- as.vector(as.matrix(project_species))
{if (identical(project_species,"mouse"))
{
library(BSgenome.Mmusculus.UCSC.mm10)
}
else
{
library(BSgenome.Hsapiens.UCSC.hg19)
}}
# Setting multiprocessing options
library(BiocParallel)
register(MulticoreParam(2)) # Use 2 cores
peakfile <- paste(baseName, "loci_for_chromVAR.bed", sep="/") # filename of bed file
peaks <- getPeaks(peakfile, sort_peaks = FALSE)
counts_file <-paste(baseName,"data_for_chromVAR.txt", sep="/")
counts <- read.table(file = counts_file, sep = '\t',row.names=1,header=T)
my_counts_matrix <- as.matrix(counts)
example_counts <- SummarizedExperiment(assays = list(counts = my_counts_matrix),
rowRanges = peaks)
# In order to be able to use the filterSamples function, a “depth??? column with the total sequencing depth must be included in the colData in the SummarizedExperiment object.
example_counts@colData$depth <- as.matrix(colSums(my_counts_matrix))
{if (identical(project_species,"mouse"))
{
example_counts <- addGCBias(example_counts, genome = BSgenome.Mmusculus.UCSC.mm10) #
}
else
{
example_counts <- addGCBias(example_counts, genome = BSgenome.Hsapiens.UCSC.hg19) #
}}
expr_matrix <- assay(example_counts)
cellInfo <- as.data.frame(colData(example_counts, internal=TRUE))
geneInfo <- as.data.frame(rowData(example_counts, internal=TRUE))
# Filtering
counts_filtered <- filterPeaks(example_counts, min_fragments_per_peak = 1, non_overlapping = TRUE)
### Get motifs and what peaks contain motifs
{if (identical(project_species,"mouse"))
{
motifs <- getJasparMotifs(species = "Mus musculus")
motif_ix <- matchMotifs(motifs, counts_filtered, genome = BSgenome.Mmusculus.UCSC.mm10, out = "scores")
}
else
{
motifs <- getJasparMotifs(species = "Homo sapiens")
motif_ix <- matchMotifs(motifs, counts_filtered, genome = BSgenome.Hsapiens.UCSC.hg19, out = "scores")
}}
motif_matrix <- motifMatches(motif_ix)
#motif_matrix_score <- motifScores(motif_ix) # a matrix with the score of the high motif score within each range/sequence (score only reported if match present)
#motif_matrix_count <- motifCounts(motif_ix) # a matrix with the number of motif matches.
data_m <- as.data.frame(as.matrix(motif_matrix))
out1 <- paste(baseName, "chromVAR_motif_matrix_feature.txt", sep="/")
write.table(data_m,file = out1, sep = '\t')
TFs <- colnames(data_m)
TFs <-as.vector(TFs)
out2 <- paste(baseName, "chromVAR_motif_names.txt", sep="/")
write.table(TFs,file = out2,sep = '\t')