-
Notifications
You must be signed in to change notification settings - Fork 0
/
process-data.R
59 lines (35 loc) · 1.26 KB
/
process-data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
library(SingleCellExperiment)
library(Matrix)
# library(tidyverse)
library(scater)
library(scran)
set.seed(23523L)
counts <- readMM("data/counts.umi.txt.gz")
cells <- read.table("data/cells.umi.new.txt")
genes <- read.table("data/genes.umi.txt")
rownames(counts) <- genes$V1
colnames(counts) <- cells$V1
counts <- counts[, grepl("10x_v2", colnames(counts))]
metadata <- read_tsv('data/meta.txt')
metadata <- filter(metadata, NAME %in% colnames(counts))
counts <- counts[, metadata$NAME]
sce <- SingleCellExperiment(
assays = list(counts = counts),
colData = metadata
)
# Sort gene names ---------------------------------------------------------
symbol <- sapply(strsplit(rownames(sce), "_"), `[`, 2)
is_dup <- duplicated(symbol)
sce <- sce[!is_dup,]
rownames(sce) <- symbol[!is_dup]
# QC ----------------------------------------------------------------------
is_mito <- grepl("^MT-", rownames(sce))
sce <- addPerCellQC(sce, subsets=list(mito=is_mito))
plotColData(sce, x="detected", y="subsets_mito_percent", colour_by = "CellType")
## Already QCd
sce <- computeSumFactors(sce)
sce <- logNormCounts(sce)
sce <- runPCA(sce, ncomponents = 50)
sce <- runUMAP(sce)
# Save --------------------------------------------------------------------
saveRDS(sce, "data/sce.rds")