generated from openproblems-bio/task_template
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5639642
commit c6093f6
Showing
12 changed files
with
474 additions
and
102 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
__merge__: ../../api/comp_method.yaml | ||
|
||
name: "alra" | ||
info: | ||
label: ALRA | ||
summary: "ALRA imputes missing values in scRNA-seq data by computing rank-k approximation, thresholding by gene, and rescaling the matrix." | ||
description: | | ||
Adaptively-thresholded Low Rank Approximation (ALRA). | ||
ALRA is a method for imputation of missing values in single cell RNA-sequencing data, | ||
described in the preprint, "Zero-preserving imputation of scRNA-seq data using low-rank approximation" | ||
available [here](https://www.biorxiv.org/content/early/2018/08/22/397588). Given a | ||
scRNA-seq expression matrix, ALRA first computes its rank-k approximation using randomized SVD. | ||
Next, each row (gene) is thresholded by the magnitude of the most negative value of that gene. | ||
Finally, the matrix is rescaled. | ||
reference: "linderman2018zero" | ||
repository_url: "https://github.com/KlugerLab/ALRA" | ||
documentation_url: https://github.com/KlugerLab/ALRA/blob/master/README.md | ||
v1: | ||
path: openproblems/tasks/denoising/methods/alra.py | ||
commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32 | ||
variants: | ||
alra: | ||
preferred_normalization: counts | ||
arguments: | ||
- name: "--norm" | ||
type: string | ||
choices: ["sqrt", "log"] | ||
default: "log" | ||
description: Normalization method | ||
resources: | ||
- type: r_script | ||
path: script.R | ||
engines: | ||
- type: docker | ||
image: ghcr.io/openproblems-bio/base_r:1.0.4 | ||
setup: | ||
- type: r | ||
cran: [ Matrix, rsvd ] | ||
github: KlugerLab/ALRA | ||
runners: | ||
- type: nextflow | ||
directives: | ||
label: [midtime, highmem, highcpu] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
cat(">> Loading dependencies\n") | ||
library(anndata, warn.conflicts = FALSE) | ||
library(ALRA, warn.conflicts = FALSE) | ||
|
||
## VIASH START | ||
par <- list( | ||
input_train = "resources_test/denoising/pancreas/train.h5ad", | ||
norm = "log", | ||
output = "output.h5ad" | ||
) | ||
meta <- list( | ||
functionality_name = "alra" | ||
) | ||
## VIASH END | ||
|
||
cat(">> Load input data\n") | ||
input_train <- read_h5ad(par$input_train, backed = "r") | ||
|
||
cat(">> Set normalization method\n") | ||
if (par$norm == "sqrt") { | ||
norm_fn <- sqrt | ||
denorm_fn <- function(x) x^2 | ||
} else if (par$norm == "log") { | ||
norm_fn <- log1p | ||
denorm_fn <- expm1 | ||
} else { | ||
stop("Unknown normalization method: ", par$norm) | ||
} | ||
|
||
cat(">> Normalize data\n") | ||
data <- as.matrix(input_train$layers[["counts"]]) | ||
totalPerCell <- rowSums(data) | ||
data <- sweep(data, 1, totalPerCell, "/") | ||
data <- norm_fn(data) | ||
|
||
cat(">> Run ALRA\n") | ||
data <- alra(data)$A_norm_rank_k_cor_sc | ||
data <- denorm_fn(data) | ||
data <- sweep(data, 1, totalPerCell, "*") | ||
|
||
cat(">> Store output\n") | ||
output <- AnnData( | ||
layers = list(denoised = data), | ||
obs = input_train$obs[, c(), drop = FALSE], | ||
var = input_train$var[, c(), drop = FALSE], | ||
uns = list( | ||
dataset_id = input_train$uns[["dataset_id"]], | ||
method_id = meta$functionality_name | ||
) | ||
) | ||
|
||
cat(">> Write output to file\n") | ||
output$write_h5ad(par$output, compression = "gzip") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
__merge__: ../../api/comp_method.yaml | ||
name: "dca" | ||
info: | ||
label: DCA | ||
summary: "A deep autoencoder with ZINB loss function to address the dropout effect in count data" | ||
description: | | ||
"Deep Count Autoencoder | ||
Removes the dropout effect by taking the count structure, overdispersed nature and sparsity of the data into account | ||
using a deep autoencoder with zero-inflated negative binomial (ZINB) loss function." | ||
reference: "eraslan2019single" | ||
documentation_url: "https://github.com/theislab/dca#readme" | ||
repository_url: "https://github.com/theislab/dca" | ||
v1: | ||
path: openproblems/tasks/denoising/methods/dca.py | ||
commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32 | ||
variants: | ||
dca: | ||
preferred_normalization: counts | ||
arguments: | ||
- name: "--epochs" | ||
type: "integer" | ||
default: 300 | ||
description: "Number of total epochs in training" | ||
resources: | ||
- type: python_script | ||
path: script.py | ||
engines: | ||
- type: docker | ||
image: python:3.9 | ||
setup: | ||
- type: apt | ||
packages: procps | ||
- type: python | ||
packages: | ||
- anndata~=0.8.0 | ||
- scanpy | ||
- pyyaml | ||
- requests | ||
- jsonschema | ||
- "git+https://github.com/scottgigante-immunai/dca.git@patch-1" | ||
runners: | ||
- type: nextflow | ||
directives: | ||
label: [midtime, highmem, highcpu] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import anndata as ad | ||
from dca.api import dca | ||
|
||
## VIASH START | ||
par = { | ||
'input_train': 'resources_test/denoising/pancreas/train.h5ad', | ||
'output': 'output_dca.h5ad', | ||
'epochs': 300, | ||
} | ||
meta = { | ||
'functionality_name': 'dca', | ||
} | ||
## VIASH END | ||
|
||
print("load input data", flush=True) | ||
input_train = ad.read_h5ad(par['input_train'], backed="r") | ||
|
||
print("Remove unneeded data", flush=True) | ||
output = ad.AnnData( | ||
X=input_train.layers["counts"], | ||
obs=input_train.obs[[]], | ||
var=input_train.var[[]], | ||
uns={ | ||
"dataset_id": input_train.uns["dataset_id"], | ||
"method_id": meta["functionality_name"] | ||
} | ||
) | ||
|
||
del input_train | ||
|
||
print("Run DCA", flush=True) | ||
dca(output, epochs=par["epochs"]) | ||
|
||
print("Move output to correct location", flush=True) | ||
output.layers["denoised"] = output.X | ||
del output.X | ||
|
||
print("Writing data", flush=True) | ||
output.write_h5ad(par["output"], compression="gzip") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
__merge__: ../../api/comp_method.yaml | ||
|
||
name: "knn_smoothing" | ||
info: | ||
label: KNN Smoothing | ||
summary: "Iterative kNN-smoothing denoises scRNA-seq data by iteratively increasing the size of neighbourhoods for smoothing until a maximum k value is reached." | ||
description: "Iterative kNN-smoothing is a method to repair or denoise noisy scRNA-seq | ||
expression matrices. Given a scRNA-seq expression matrix, KNN-smoothing first | ||
applies initial normalisation and smoothing. Then, a chosen number of | ||
principal components is used to calculate Euclidean distances between cells. | ||
Minimally sized neighbourhoods are initially determined from these Euclidean | ||
distances, and expression profiles are shared between neighbouring cells. | ||
Then, the resultant smoothed matrix is used as input to the next step of | ||
smoothing, where the size (k) of the considered neighbourhoods is increased, | ||
leading to greater smoothing. This process continues until a chosen maximum k | ||
value has been reached, at which point the iteratively smoothed object is | ||
then optionally scaled to yield a final result." | ||
reference: "wagner2018knearest" | ||
documentation_url: "https://github.com/yanailab/knn-smoothing#readme" | ||
repository_url: "https://github.com/yanailab/knn-smoothing" | ||
v1: | ||
path: openproblems/tasks/denoising/methods/knn_smoothing.py | ||
commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32 | ||
variants: | ||
knn_smoothing: | ||
preferred_normalization: counts | ||
resources: | ||
- type: python_script | ||
path: script.py | ||
|
||
engines: | ||
- type: docker | ||
image: ghcr.io/openproblems-bio/base_python:1.0.4 | ||
setup: | ||
- type: python | ||
packages: | ||
- scipy | ||
github: | ||
- scottgigante-immunai/knn-smoothing@python_package | ||
runners: | ||
- type: nextflow | ||
directives: | ||
label: [midtime, highmem, highcpu] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import knn_smooth | ||
import anndata as ad | ||
|
||
## VIASH START | ||
par = { | ||
'input_train': 'resources_test/denoising/pancreas/train.h5ad', | ||
'output': 'output_knn.h5ad', | ||
} | ||
meta = { | ||
'functionality_name': 'foo', | ||
} | ||
## VIASH END | ||
|
||
print("Load input data", flush=True) | ||
input_train = ad.read_h5ad(par["input_train"], backed="r") | ||
|
||
print("Remove unneeded data", flush=True) | ||
X = input_train.layers["counts"].astype(float).transpose().toarray() | ||
|
||
# Create output AnnData for later use | ||
output = ad.AnnData( | ||
obs=input_train.obs[[]], | ||
var=input_train.var[[]], | ||
uns={ | ||
"dataset_id": input_train.uns["dataset_id"], | ||
"method_id": meta["functionality_name"] | ||
} | ||
) | ||
|
||
del input_train | ||
|
||
print("Run KNN smoothing", flush=True) | ||
X = knn_smooth.knn_smoothing(X, k=10).transpose() | ||
|
||
print("Process data", flush=True) | ||
output.layers["denoised"] = X | ||
|
||
print("Writing data", flush=True) | ||
output.write_h5ad(par["output"], compression="gzip") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
__merge__: ../../api/comp_method.yaml | ||
name: "magic" | ||
info: | ||
label: MAGIC | ||
summary: "MAGIC imputes and denoises scRNA-seq data that is noisy or dropout-prone." | ||
description: "MAGIC (Markov Affinity-based Graph Imputation of Cells) is a method for | ||
imputation and denoising of noisy or dropout-prone single cell RNA-sequencing | ||
data. Given a normalised scRNA-seq expression matrix, it first calculates | ||
Euclidean distances between each pair of cells in the dataset, which is then | ||
augmented using a Gaussian kernel (function) and row-normalised to give a | ||
normalised affinity matrix. A t-step markov process is then calculated, by | ||
powering this affinity matrix t times. Finally, the powered affinity matrix | ||
is right-multiplied by the normalised data, causing the final imputed values | ||
to take the value of a per-gene average weighted by the affinities of cells. | ||
The resultant imputed matrix is then rescaled, to more closely match the | ||
magnitude of measurements in the normalised (input) matrix." | ||
reference: "van2018recovering" | ||
documentation_url: "https://github.com/KrishnaswamyLab/MAGIC#readme" | ||
repository_url: "https://github.com/KrishnaswamyLab/MAGIC" | ||
v1: | ||
path: openproblems/tasks/denoising/methods/magic.py | ||
commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32 | ||
variants: | ||
magic: | ||
magic_approx: | ||
solver: approximate | ||
magic_knn_naive: | ||
norm: log | ||
decay: none | ||
t: 1 | ||
preferred_normalization: counts | ||
arguments: | ||
- name: "--solver" | ||
type: "string" | ||
choices: ["exact", "approximate"] | ||
default: "exact" | ||
description: Which solver to use. | ||
- name: "--norm" | ||
type: string | ||
choices: ["sqrt", "log"] | ||
default: "log" | ||
description: Normalization method | ||
- name: "--decay" | ||
type: integer | ||
default: 1 | ||
description: sets decay rate of kernel tails | ||
- name: "--t" | ||
type: integer | ||
default: 3 | ||
description: power to which the diffusion operator is powered | ||
resources: | ||
- type: python_script | ||
path: script.py | ||
engines: | ||
- type: docker | ||
image: ghcr.io/openproblems-bio/base_python:1.0.4 | ||
setup: | ||
- type: python | ||
pip: [scprep, magic-impute, scipy, scikit-learn<1.2] | ||
runners: | ||
- type: nextflow | ||
directives: | ||
label: [midtime, highmem, highcpu] |
Oops, something went wrong.