-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* adds decoupler_pathway_inference and its Galaxy wrapper --------- Co-authored-by: Nicola Soranzo <[email protected]> Co-authored-by: Pablo Moreno <[email protected]>
- Loading branch information
1 parent
0264c35
commit 1034a45
Showing
5 changed files
with
419 additions
and
0 deletions.
There are no files selected for viewing
132 changes: 132 additions & 0 deletions
132
tools/tertiary-analysis/decoupler/decoupler_pathway_inference.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
# import the necessary packages | ||
import argparse | ||
|
||
import anndata as ad | ||
import decoupler as dc | ||
import pandas as pd | ||
|
||
# define arguments for the script | ||
parser = argparse.ArgumentParser() | ||
|
||
# add AnnData input file option | ||
parser.add_argument( | ||
"-i", "--input_anndata", help="AnnData input file", required=True | ||
) | ||
|
||
# add network input file option | ||
parser.add_argument( | ||
"-n", "--input_network", help="Network input file", required=True | ||
) | ||
|
||
# output file prefix | ||
parser.add_argument( | ||
"-o", "--output", | ||
help="output files prefix", | ||
default=None, | ||
) | ||
|
||
# path to save Activities AnnData file | ||
parser.add_argument( | ||
"-a", "--activities_path", help="Path to save Activities AnnData file", default=None | ||
) | ||
|
||
# Column name in net with source nodes | ||
parser.add_argument( | ||
"-s", "--source", help="Column name in net with source nodes.", default="source" | ||
) | ||
|
||
# Column name in net with target nodes | ||
parser.add_argument( | ||
"-t", "--target", help="Column name in net with target nodes.", default="target" | ||
) | ||
|
||
# Column name in net with weights. | ||
parser.add_argument( | ||
"-w", "--weight", help="Column name in net with weights.", default="weight" | ||
) | ||
|
||
# add boolean argument for use_raw | ||
parser.add_argument( | ||
"--use_raw", action="store_true", default=False, help="Whether to use the raw part of the AnnData object" | ||
) | ||
|
||
# add argument for min_cells | ||
parser.add_argument( | ||
"--min_n", help="Minimum of targets per source. If less, sources are removed.", default=5, type=int | ||
) | ||
|
||
# add activity inference method option | ||
parser.add_argument( | ||
"-m", "--method", help="Activity inference method", default="mlm", required=True | ||
) | ||
args = parser.parse_args() | ||
|
||
# check that either -o or --output is specified | ||
if args.output is None: | ||
raise ValueError("Please specify either -o or --output") | ||
|
||
# read in the AnnData input file | ||
adata = ad.read_h5ad(args.input_anndata) | ||
|
||
# read in the input file network input file | ||
network = pd.read_csv(args.input_network, sep='\t') | ||
|
||
if ( | ||
args.source not in network.columns | ||
or args.target not in network.columns | ||
or args.weight not in network.columns | ||
): | ||
raise ValueError( | ||
"Source, target, and weight columns are not present in the network" | ||
) | ||
|
||
|
||
print(type(args.min_n)) | ||
|
||
if args.method == "mlm": | ||
dc.run_mlm( | ||
mat=adata, | ||
net=network, | ||
source=args.source, | ||
target=args.target, | ||
weight=args.weight, | ||
verbose=True, | ||
min_n=args.min_n, | ||
use_raw=args.use_raw | ||
) | ||
|
||
if args.output is not None: | ||
# write adata.obsm[mlm_key] and adata.obsm[mlm_pvals_key] to the output network files | ||
combined_df = pd.concat([adata.obsm["mlm_estimate"], adata.obsm["mlm_pvals"]], axis=1) | ||
|
||
# Save the combined dataframe to a file | ||
combined_df.to_csv(args.output + ".tsv", sep="\t") | ||
|
||
# if args.activities_path is specified, generate the activities AnnData and save the AnnData object to the specified path | ||
if args.activities_path is not None: | ||
acts = dc.get_acts(adata, obsm_key="mlm_estimate") | ||
acts.write_h5ad(args.activities_path) | ||
|
||
elif args.method == "ulm": | ||
dc.run_ulm( | ||
mat=adata, | ||
net=network, | ||
source=args.source, | ||
target=args.target, | ||
weight=args.weight, | ||
verbose=True, | ||
min_n=args.min_n, | ||
use_raw=args.use_raw | ||
) | ||
|
||
if args.output is not None: | ||
# write adata.obsm[mlm_key] and adata.obsm[mlm_pvals_key] to the output network files | ||
combined_df = pd.concat([adata.obsm["ulm_estimate"], adata.obsm["ulm_pvals"]], axis=1) | ||
|
||
# Save the combined dataframe to a file | ||
combined_df.to_csv(args.output + ".tsv", sep="\t") | ||
|
||
# if args.activities_path is specified, generate the activities AnnData and save the AnnData object to the specified path | ||
if args.activities_path is not None: | ||
acts = dc.get_acts(adata, obsm_key="ulm_estimate") | ||
acts.write_h5ad(args.activities_path) |
129 changes: 129 additions & 0 deletions
129
tools/tertiary-analysis/decoupler/decoupler_pathway_inference.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
<tool id="decoupler_pathway_inference" name="Decoupler Pathway Inference" version="1.4.0+galaxy0" profile="20.05" license="MIT"> | ||
<description> | ||
of functional genesets/pathways for scRNA-seq data. | ||
</description> | ||
<requirements> | ||
<requirement type="package" version="1.4.0">decoupler</requirement> | ||
</requirements> | ||
<command> | ||
python '$__tool_directory__/decoupler_pathway_inference.py' | ||
-i '$input_anndata' | ||
-n '$input_network_file' | ||
--min_n "$min_n" | ||
--method '$method' | ||
$use_raw | ||
--source $source | ||
--target $target | ||
--weight $weight | ||
--output "inference" | ||
$write_activities_path | ||
</command> | ||
<inputs> | ||
<param name="input_anndata" type="data" format="h5ad" label="Input AnnData file" /> | ||
<param name="input_network_file" type="data" format="tabular" label="Input Network file" help="Tabular file with columns Source, Target and Weight. A source gene/pathway regulates/contains a target gene, weights can be either positive or negative. The source element needs to be part of the network, the target is a gene in the network and in the dataset" /> | ||
<param name="min_n" type="integer" min="0" value="5" label="Minimum targets per source." help="If targets are less than minimum, sources are removed" /> | ||
<param name="method" type="select" label="Activity inference method"> | ||
<option value="mlm" selected="true">Multivariate linear model (MLM)</option> | ||
<option value="ulm">Univariate linear model (ULM)</option> | ||
</param> | ||
<param name="use_raw" type="boolean" truevalue="--use_raw" falsevalue="" checked="false" label="Use the raw part of the AnnData object" /> | ||
<param name="write_activities_path" type="boolean" truevalue="--activities_path anndata_activities_path.h5ad" falsevalue="" checked="true" label="Write the activities AnnData object (contains the MLM/ULM activity results for each pathway and each cell in the main matrix, it is not a replacement of the original AnnData provided as input)." /> | ||
<param name="source" type="text" value='source' label="Column name in network with source nodes." help="If empty then default is 'source' is used." /> | ||
<param name="target" type="text" value='target' label="Column name in network with target nodes." help="If empty then default is 'target' is used." /> | ||
<param name="weight" type="text" value='weight' label="Column name in network with weight." help="If empty then default is 'weight' is used." /> | ||
</inputs> | ||
<outputs> | ||
<data name="output_ad" format="h5ad" from_work_dir="anndata_activities_path.h5ad" label="${tool.name} on ${on_string}: Regulators/Pathways activity AnnData file"> | ||
<filter>write_activities_path</filter> | ||
</data> | ||
<data name="output_table" format="tabular" from_work_dir="inference.tsv" label="${tool.name} on ${on_string}: Output estimate table" /> | ||
</outputs> | ||
<tests> | ||
<!-- Hint: You can use [ctrl+alt+t] after defining the inputs/outputs to auto-scaffold some basic test cases. --> | ||
|
||
<test expect_num_outputs="2"> | ||
<param name="input_anndata" value="pbmc3k_processed.h5ad"/> | ||
<param name="input_network_file" value="progeny_test.tsv"/> | ||
<param name="min_n" value="0"/> | ||
<param name="method" value="mlm"/> | ||
<param name="use_raw" value="false"/> | ||
<param name="write_activities_path" value="true"/> | ||
<param name="source" value="source"/> | ||
<param name="target" value="target"/> | ||
<param name="weight" value="weight"/> | ||
<output name="output_ad"> | ||
<assert_contents> | ||
<has_h5_keys keys="obsm/mlm_estimate"/> | ||
</assert_contents> | ||
</output> | ||
<output name="output_table"> | ||
<assert_contents> | ||
<has_n_columns n="5"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
<test> | ||
<param name="input_anndata" value="pbmc3k_processed.h5ad"/> | ||
<param name="input_network_file" value="progeny_test_2.tsv"/> | ||
<param name="min_n" value="0"/> | ||
<param name="method" value="ulm"/> | ||
<param name="use_raw" value="false"/> | ||
<param name="write_activities_path" value="true"/> | ||
<param name="source" value="source"/> | ||
<param name="target" value="target"/> | ||
<param name="weight" value="weight"/> | ||
<output name="output_ad"> | ||
<assert_contents> | ||
<has_h5_keys keys="obsm/ulm_estimate"/> | ||
</assert_contents> | ||
</output> | ||
<output name="output_table"> | ||
<assert_contents> | ||
<has_n_columns n="5"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
</tests> | ||
<help> | ||
**What it does** | ||
|
||
Usage | ||
..... | ||
|
||
|
||
**Description** | ||
|
||
This tool extracts pathway activity inference using decoupler. | ||
|
||
**Input** | ||
|
||
The input file should be an AnnData object in H5AD format. The tool accepts an H5AD file containing raw or normalized data. | ||
|
||
The tool also takes network file containing a collection of pathways and their target genes, with weights for each interaction. | ||
Example: | ||
``` | ||
source target weight | ||
0 T1 G01 1.0 | ||
1 T1 G02 1.0 | ||
2 T1 G03 0.7 | ||
3 T2 G04 1.0 | ||
4 T2 G06 -0.5 | ||
``` | ||
|
||
You can also specify whether to use the raw data in the AnnData object instead of the X matrix using the "use_raw" parameter and Minimum of targets per source using "min_n". | ||
|
||
|
||
**Output** | ||
|
||
The tool outputs an AnnData object containing the scores in the "obs" field, and tab-separated text files containing the scores for each cell. | ||
|
||
If the "write_activities_path" parameter is set to "true", the tool will write the modified AnnData object to an H5AD file. | ||
If the "write_inference" parameter is set to "true", the tool will output a tab-separated text file containing the scores for each cell. | ||
|
||
|
||
|
||
</help> | ||
<citations> | ||
<citation type="doi">10.1093/bioadv/vbac016 </citation> | ||
</citations> | ||
</tool> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
71 changes: 71 additions & 0 deletions
71
tools/tertiary-analysis/decoupler/test-data/progeny_test.tsv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
source target weight p_value | ||
0 Androgen TMPRSS2 11.490631 0.0 | ||
1 Androgen NKX3-1 10.622551 2.2e-44 | ||
2 Androgen MBOAT2 10.472733 4.6e-44 | ||
3 Androgen KLK2 10.176186 1.94441e-40 | ||
4 Androgen SARG 11.386852 2.79021e-40 | ||
5 EGFR LZTFL1 -1.8738769 2.0809955e-18 | ||
6 EGFR PHLDA2 3.5051384 2.0530624e-17 | ||
7 EGFR DUSP6 12.6293125 6.537324e-17 | ||
8 EGFR DUSP5 7.9430394 6.86669e-17 | ||
9 EGFR PHLDA1 6.619626 3.4106933e-16 | ||
10 Estrogen GREB1 17.240173 0.0 | ||
11 Estrogen RET 10.718027 0.0 | ||
12 Estrogen TFF1 14.430255 0.0 | ||
13 Estrogen HEY2 11.482369 3.1e-44 | ||
14 Estrogen RAPGEFL1 10.544896 5.2e-43 | ||
15 Hypoxia FAM162A 8.335551 0.0 | ||
16 Hypoxia NDRG1 22.08712 0.0 | ||
17 Hypoxia ENO2 14.32694 0.0 | ||
18 Hypoxia PDK1 13.120449 0.0 | ||
19 Hypoxia ANKRD37 8.484976 0.0 | ||
20 JAK-STAT OAS1 15.028714 1.058e-41 | ||
21 JAK-STAT HERC6 8.769676 1.3450407e-38 | ||
22 JAK-STAT OAS3 10.618842 1.2143582e-37 | ||
23 JAK-STAT PLSCR1 8.481604 8.955206e-37 | ||
24 JAK-STAT DDX60 12.198234 9.150971e-36 | ||
25 MAPK DUSP6 16.859016 0.0 | ||
26 MAPK SPRED2 3.5018346 0.0 | ||
27 MAPK SPRY2 9.481585 9.19e-43 | ||
28 MAPK ETV5 5.9887094 6.7425e-41 | ||
29 MAPK EPHA2 6.3140125 3.7492e-40 | ||
30 NFkB NFKB1 9.513637 0.0 | ||
31 NFkB CXCL3 22.946114 0.0 | ||
32 NFkB NFKB2 5.5155754 0.0 | ||
33 NFkB NFKBIA 11.444533 0.0 | ||
34 NFkB BCL2A1 14.416924 0.0 | ||
35 PI3K MLANA -9.985743 1.84e-43 | ||
36 PI3K PMEL -6.5903482 6.8747866e-36 | ||
37 PI3K FAXDC2 -12.421274 3.297515e-34 | ||
38 PI3K HSD17B8 -8.601571 9.948224e-34 | ||
39 PI3K CTSF -9.172143 1.0235212e-31 | ||
40 TGFb LINC00312 4.428987 2.0074443e-17 | ||
41 TGFb TSPAN2 5.502326 3.1451768e-16 | ||
42 TGFb SMAD7 7.6311436 7.3087106e-16 | ||
43 TGFb NOX4 5.913813 3.8292238e-15 | ||
44 TGFb COL4A1 6.3374896 9.052501e-15 | ||
45 TNFa CSF2 8.35548 0.0 | ||
46 TNFa CXCL5 10.0813675 0.0 | ||
47 TNFa NFKBIE 10.356205 0.0 | ||
48 TNFa TNFAIP3 35.40072 0.0 | ||
49 TNFa EFNA1 18.63111 0.0 | ||
50 Trail FRMPD1 -2.2346141 9.378505e-07 | ||
51 Trail WT1-AS 2.2251053 2.0316747e-06 | ||
52 Trail WNT8A -1.8469616 3.795469e-05 | ||
53 Trail GPR18 3.240805 6.1090715e-05 | ||
54 Trail TEC 2.0513217 6.32898e-05 | ||
55 VEGF CRACD -4.87119 6.7185365e-25 | ||
56 VEGF VWA8 -3.6068044 1.4495265e-18 | ||
57 VEGF NLGN1 -5.618075 2.6587072e-18 | ||
58 VEGF NRG3 -5.823747 1.0848074e-16 | ||
59 VEGF KCNK10 2.8833063 1.8129868e-16 | ||
60 WNT BMP4 5.936831 2.511717e-10 | ||
61 WNT SIGLEC6 2.0207362 2.347858e-09 | ||
62 WNT NPY2R 1.3872339 8.666917e-09 | ||
63 WNT CSF3R 1.9323153 3.0219417e-07 | ||
64 WNT KRT23 4.1216116 5.463989e-07 | ||
65 p53 GLS2 6.452465 7.444302e-37 | ||
66 p53 MDM2 8.193488 2.1194304e-35 | ||
67 p53 ZNF79 4.020263 4.5987433e-34 | ||
68 p53 FDXR 11.994496 5.589482e-32 | ||
69 p53 LCE1B 11.813737 7.8095406e-30 |
Oops, something went wrong.