This repository has been archived by the owner on Dec 30, 2023. It is now read-only.
forked from HumanCellAtlas/hca-jamboree-how-many-cells
-
Notifications
You must be signed in to change notification settings - Fork 0
/
actual_separability.R
72 lines (63 loc) · 2.78 KB
/
actual_separability.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env Rscript
# This script takes a loom object and two cluster names and computes the actual
# separability
################################ Arguments #####################################
# args[1] Path to loom file
# args[2] Name of cluster 1
# args[3] Name of cluster 2
################################################################################
suppressMessages(library(Seurat))
suppressMessages(library(RANN))
suppressMessages(library(loomR))
################################ Fxn Definitions ###############################
ProcessData <- function(object) {
object <- FindVariableGenes(object, do.plot = FALSE, display.progress = FALSE)
[email protected] <- rownames(head([email protected], 1000))
object <- ScaleData(object, genes.use = [email protected],
display.progress = FALSE, check.for.norm = FALSE)
object <- RunPCA(object, pcs.compute = 30, do.print = FALSE)
return(GetCellEmbeddings(object, reduction.type = "pca", dims.use = 1:30))
}
ComputeSeparability <- function(input.data, cells.1, cells.2, k = 20) {
if (length(cells.1) < 3) {
stop("Fewer than 3 cells in the first group (cells.1)")
}
if (length(cells.2) < 3) {
stop("Fewer than 3 cells in the second group (cells.2)")
}
k <- min(c(k, length(cells.1), length(cells.2)))
tnn <- nn2(data = input.data[c(cells.1, cells.2), ], k = k + 1)
idx <- tnn$nn.idx[, -1]
rownames(idx) <- c(cells.1, cells.2)
correct_neighbors_c1 <- sapply(cells.1, function(x)
{
length(which(rownames(idx)[idx[x,]] %in% cells.1))
}
)
correct_neighbors_c2 <- sapply(cells.2, function(x)
{
length(which(rownames(idx)[idx[x,]] %in% cells.2))
}
)
return(mean(c(correct_neighbors_c1, correct_neighbors_c2)) / k)
}
################################################################################
args <- commandArgs(trailingOnly = TRUE)
lf <- connect(filename = args[1])
# Get UMI matrix and cluster information from the loom object
umi.matrix <- t(lf[["matrix"]][,])
rownames(umi.matrix) <- lf[["row_attrs/gene_names"]][]
colnames(umi.matrix) <- lf[["col_attrs/cell_names"]][]
clusters <- lf[["col_attrs/cluster"]][]
seurat.object <- CreateSeuratObject(raw.data = umi.matrix)
seurat.object <- NormalizeData(object = seurat.object, display.progress = FALSE)
processed.mat <- ProcessData(object = seurat.object)
seurat.object <- SetIdent(object = seurat.object,
cells.use = [email protected],
ident.use = clusters)
cells.1 <- WhichCells(object = seurat.object, ident = args[2])
cells.2 <- WhichCells(object = seurat.object, ident = args[3])
separability <- ComputeSeparability(input.data = processed.mat,
cells.1 = cells.1,
cells.2 = cells.2)
print(paste0("Separability: ", separability))