Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

process multiomics workflow updated #15

Merged
merged 1 commit into from
Oct 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
File renamed without changes.
File renamed without changes.
29 changes: 29 additions & 0 deletions scripts/run_process_multiomics_dataset.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

RUN_ID="process_multiomics"
# resources_dir="s3://openproblems-data/resources/grn/"
resources_dir="resources"
publish_dir="${resources_dir}/results/${RUN_ID}"

cat > ./params/${RUN_ID}.yaml << HERE
param_list:
- id: process_multiomics
multiome_counts: $resources_dir/datasets_raw/multiome_counts.h5ad

output_state: "state.yaml"
publish_dir: "$publish_dir"
HERE


# ./tw-windows-x86_64.exe launch https://github.com/openproblems-bio/task_grn_inference.git `
# --revision build/main --pull-latest `
# --main-script target/nextflow/workflows/process_multiomics/main.nf `
# --workspace 53907369739130 --compute-env 6TeIFgV5OY4pJCk8I0bfOh `
# --params-file ./params/process_multiomics.yaml `
# --config src/common/nextflow_helpers/labels_tw.config


nextflow run . \
-main-script target/nextflow/workflows/process_multiomics/main.nf \
-profile docker -with-trace -c src/common/nextflow_helpers/labels_ci.config \
-params-file params/${RUN_ID}.yaml
28 changes: 28 additions & 0 deletions scripts/run_process_perturbation_dataset.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/bash

RUN_ID="process_perturbation"
resources_dir="s3://openproblems-data/resources/grn/"
publish_dir="${resources_dir}/results/${RUN_ID}"

cat > ./params/${RUN_ID}.yaml << HERE
param_list:
- id: test_process_perturatbion
perturbation_counts: $resources_dir/datasets_raw/perturbation_counts.h5ad

output_state: "state.yaml"
publish_dir: "$publish_dir"
HERE


# ./tw-windows-x86_64.exe launch https://github.com/openproblems-bio/task_grn_inference.git `
# --revision build/main --pull-latest `
# --main-script target/nextflow/workflows/process_perturbation/main.nf `
# --workspace 53907369739130 --compute-env 6TeIFgV5OY4pJCk8I0bfOh `
# --params-file ./params/process_perturbation.yaml `
# --config src/common/nextflow_helpers/labels_tw.config


nextflow run . \
-main-script target/nextflow/workflows/process_perturbation/main.nf \
-profile docker -with-trace -c src/common/nextflow_helpers/labels_ci.config \
-params-file params/${RUN_ID}.yaml
28 changes: 0 additions & 28 deletions scripts/run_process_perturbation_tw.sh

This file was deleted.

1 change: 1 addition & 0 deletions src/methods/multi_omics/figr/script.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ dir.create(par$temp_dir, recursive = TRUE, showWarnings = TRUE)
atac = readRDS(par$multiomics_atac_r)
rna = readRDS(par$multiomics_rna_r)


colnames(atac) <- gsub("-", "", colnames(atac))
colnames(rna) <- gsub("-", "", colnames(rna))

Expand Down
40 changes: 0 additions & 40 deletions src/process_data/multiomics/batch_correction/script.py

This file was deleted.

15 changes: 2 additions & 13 deletions src/process_data/multiomics/format_data/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,15 @@ functionality:
arguments:
- name: --multiome_counts
type: file
required: false
required: true
direction: input
example: resources/datasets_raw/multiome_counts.h5ad

- name: --multiomics_rna
type: file
required: false
direction: output
example: resources/grn-benchmark/multiomics_rna.h5ad
- name: --multiomics_rna_d0
type: file
required: false
direction: output
example: resources/grn-benchmark/multiomics_rna_d0.h5ad

- name: --multiomics_rna_d0_hvg
type: file
required: false
direction: output
example: resources/grn-benchmark/multiomics_rna_d0_hvg.h5ad

- name: --multiomics_atac
type: file
required: false
Expand Down
27 changes: 9 additions & 18 deletions src/process_data/multiomics/format_data/script.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import anndata as ad
import scanpy as sc
import numpy as np
## VIASH START
par = {
# 'multiome_counts': 'resources/datasets_raw/multiome_counts.h5ad',
Expand All @@ -26,22 +27,14 @@
multiomics_rna = multiomics[:,multiomics.var.feature_types=='Gene Expression']
multiomics_rna.var = multiomics_rna.var[['gene_ids', 'interval']]

def high_coverage(adata):
threshold = 0.1
mask = adata.X!=0
mask_obs = (np.sum(mask, axis=1).A.flatten()/mask.shape[1])>threshold
mask_var = (np.sum(mask, axis=0).A.flatten()/mask.shape[0])>threshold
adata.obs['high_coverage'] = mask_obs
adata.var['high_coverage'] = mask_var
high_coverage(multiomics_rna)

# hvgs
var = sc.pp.highly_variable_genes(multiomics_rna, flavor='seurat_v3', n_top_genes=7000, inplace=False)
multiomics_rna.var['highly_variable'] = var.highly_variable

# subset to donor 0
multiomics_rna_d0 = multiomics_rna[multiomics_rna.obs.donor_id=='donor_0', :]
multiomics_rna_d0_hvg = multiomics_rna[multiomics_rna.obs.donor_id=='donor_0', multiomics_rna.var.highly_variable]
# def high_coverage(adata):
# threshold = 0.1
# mask = adata.X!=0
# mask_obs = (np.sum(mask, axis=1).A.flatten()/mask.shape[1])>threshold
# mask_var = (np.sum(mask, axis=0).A.flatten()/mask.shape[0])>threshold
# adata.obs['high_coverage'] = mask_obs
# adata.var['high_coverage'] = mask_var
# high_coverage(multiomics_rna)
#------ ATAC
multiomics_atac = multiomics[:,multiomics.var.feature_types=='Peaks']
multiomics_atac.var = multiomics_atac.var[[]]
Expand All @@ -62,6 +55,4 @@ def high_coverage(adata):
multiomics_atac.obs['donor_id'] = multiomics_atac.obs['donor_id'].map(donor_map)

multiomics_rna.write(par['multiomics_rna'])
multiomics_rna_h0.write(par['multiomics_rna_h0'])
multiomics_rna_h0_hvg.write(par['multiomics_rna_h0_hvg'])
multiomics_atac.write(par['multiomics_atac'])
33 changes: 14 additions & 19 deletions src/process_data/multiomics/format_resources_r/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,50 +8,45 @@ functionality:
arguments:
- name: --rna_matrix
type: file
required: false
required: true
direction: input
default: output/scRNA/X_matrix.mtx

example: output/scRNA/X_matrix.mtx
- name: --atac_matrix
type: file
required: false
required: true
direction: input
default: output/scATAC/X_matrix.mtx

example: output/scATAC/X_matrix.mtx
- name: --rna_gene_annot
type: file
required: false
required: true
direction: input
default: output/scRNA/annotation_gene.csv

example: output/scRNA/annotation_gene.csv
- name: --rna_cell_annot
type: file
required: false
required: true
direction: input
default: output/scRNA/annotation_cell.csv

example: output/scRNA/annotation_cell.csv
- name: --atac_peak_annot
type: file
required: false
required: true
direction: input
default: output/scATAC/annotation_gene.csv

example: output/scATAC/annotation_gene.csv
- name: --atac_cell_annot
type: file
required: false
required: true
direction: input
default: output/scATAC/annotation_cell.csv
example: output/scATAC/annotation_cell.csv

- name: --rna_rds
type: file
required: false
direction: output
default: resources/grn-benchmark/multiomics_r/rna.rds
example: resources/grn-benchmark/multiomics_r/rna.rds
- name: --atac_rds
type: file
required: false
direction: output
default: resources/grn-benchmark/multiomics_r/atac.rds
example: resources/grn-benchmark/multiomics_r/atac.rds



Expand Down
1 change: 1 addition & 0 deletions src/process_data/multiomics/format_resources_r/script.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ annotation_peak_filtered <- annotation_peak[filter_indices, ]
# Filter the rows in X
X_filtered <- X[filter_indices, ]


# Create the SummarizedExperiment object with the filtered data
atac <- SummarizedExperiment(assays = list(counts = X_filtered),
rowRanges = GRanges(annotation_peak_filtered$seqname,
Expand Down
21 changes: 10 additions & 11 deletions src/process_data/multiomics/multiome_matrix/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,51 +8,50 @@ functionality:
arguments:
- name: --multiomics_rna
type: file
required: false
required: true
direction: input
default: resources/grn-benchmark/multiomics_rna.h5ad
example: resources/grn-benchmark/multiomics_rna.h5ad

- name: --multiomics_atac
type: file
required: false
required: true
direction: input
default: resources/grn-benchmark/multiomics_atac.h5ad
example: resources/grn-benchmark/multiomics_atac.h5ad

- name: --rna_matrix
type: file
required: false
direction: output
default: output/scRNA/X_matrix.mtx

example: output/scRNA/X_matrix.mtx
- name: --atac_matrix
type: file
required: false
direction: output
default: output/scATAC/X_matrix.mtx
example: output/scATAC/X_matrix.mtx

- name: --rna_gene_annot
type: file
required: false
direction: output
default: output/scRNA/annotation_gene.csv
example: output/scRNA/annotation_gene.csv

- name: --rna_cell_annot
type: file
required: false
direction: output
default: output/scRNA/annotation_cell.csv
example: output/scRNA/annotation_cell.csv

- name: --atac_peak_annot
type: file
required: false
direction: output
default: output/scATAC/annotation_gene.csv
example: output/scATAC/annotation_gene.csv

- name: --atac_cell_annot
type: file
required: false
direction: output
default: output/scATAC/annotation_cell.csv
example: output/scATAC/annotation_cell.csv
resources:
- type: python_script
path: script.py
Expand Down
45 changes: 45 additions & 0 deletions src/process_data/multiomics/subset_hvg/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@

functionality:
name: subset_hvg
namespace: "multiomics"
info:
label: subset_hvg
summary: "Receives multiomics data and subsets it for hvg"
arguments:
- name: --multiomics_rna
type: file
required: true
direction: input
example: resources/grn-benchmark/multiomics_rna.h5ad
- name: --multiomics_atac
type: file
required: true
direction: input
example: resources/grn-benchmark/multiomics_atac.h5ad

- name: --multiomics_rna_d0_hvg
type: file
required: false
direction: output
example: resources/grn-benchmark/multiomics_rna_d0_hvg.h5ad
- name: --multiomics_atac_d0
type: file
required: false
direction: output
example: resources/grn-benchmark/multiomics_atac_d0.h5ad

resources:
- type: python_script
path: script.py
platforms:
- type: docker
image: ghcr.io/openproblems-bio/base_python:1.0.4
setup:
- type: python
packages: [ scikit-misc ]


- type: native
- type: nextflow
directives:
label: [midtime,midmem,midcpu]
Loading
Loading