-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSnakefile-recalculations
94 lines (82 loc) · 4.9 KB
/
Snakefile-recalculations
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
include: "Snakefile"
include: "Snakefile-proteomics"
experiment_type=get_from_config_or_metadata_summary('experiment_type')
# Proteomics experiments might need to be recalculated for new version
# of bioentities in each Ensembl update
if experiment_type == 'proteomics_baseline' or experiment_type =='proteomics_baseline_dia' or experiment_type =='proteomics_differential':
print(f"Recalculations not currently implemented for proteomics experiment: {experiment_type}")
print(f"set goal='reprocess' in the main script to reprocess")
sys.exit(1)
def get_outputs():
"""
First method to be executed since it is run by rule all.
"""
import os.path
import datetime
print('Starting getting list of outputs..' + str(datetime.datetime.now()))
tool_outputs = {}
tool_outputs['percentile-ranks'] = f"{config['accession']}-percentile-ranks.tsv"
outputs = []
#get metrics only for baseline
global metrics
if config['tool']=="all-baseline" or 'baseline-tracks' in config['tool'] or 'baseline-heatmap' in config['tool'] or 'baseline-coexpression' in config['tool']:
metrics = get_metrics_recalculations()
# Read this now so that it is available for all other needs
read_metadata_summary()
global skip_accession
skip_accession = read_skip_steps_file()
global run_deconv_accession
run_deconv_accession = read_run_deconv_file()
experiment_type=get_from_config_or_metadata_summary('experiment_type')
required_config=['tool']
check_config_required(fields=required_config)
if 'percentile-ranks' in config['tool'] or config['tool']=="all-diff" and skip(config['accession'],'percentile_ranks'):
outputs.append(f"{config['accession']}-percentile-ranks.tsv")
if 'differential-tracks' in config['tool'] or config['tool']=="all-diff" and skip(config['accession'],'differential-tracks'):
check_config_required(fields=['metadata_summary'], method='differential-tracks')
# fake elements to mix contrasts labels and ids
outputs.extend(expand(config['accession']+".{id}.{type}", id=get_contrast_ids(), type=["genes.pval.bedGraph", "genes.log2foldchange.bedGraph"]))
if 'baseline-tracks' in config['tool'] or config['tool']=="all-baseline" and skip(config['accession'],'baseline-tracks'):
check_config_required(fields=['metadata_summary'], method='baseline-tracks')
# combine metric (fpkm / tpm) with assay_id/assay_label (zip based)
# in a product manner
outputs.extend(expand(config['accession']+".{a_id}.genes.expressions_{metric}.bedGraph",
a_id=get_assay_ids(),
metric=metrics))
if 'differential-gsea' in config['tool'] or config['tool']=="all-diff" and skip(config['accession'],'differential-gsea'):
check_config_required(fields=['bioentities_properties'], method='differential-gsea')
outputs.extend(
expand( config['accession']+".{c_id}.{ext_db}.{type}",
c_id=get_contrast_ids(),
ext_db=get_ext_db(),
type=["gsea.tsv", "gsea_list.tsv"]))
outputs.extend(
expand("logs/"+config['accession']+".{c_id}.{ext_db}.{type}",
c_id=get_contrast_ids(),
ext_db=get_ext_db(),
type=["check_differential_gsea.done", "check_differential_gsea_list.done"]))
if 'atlas-experiment-summary' in config['tool'] or 'all' in config['tool'] and skip(config['accession'],'atlas_experiment_summary'):
outputs.append(f"{config['accession']}-atlasExperimentSummary.Rdata")
if 'baseline-heatmap' in config['tool'] or 'all-baseline' in config['tool'] and skip(config['accession'],'baseline-heatmap'):
outputs.extend(expand(f"{config['accession']}"+"-heatmap-{metric}.pdf", metric=metrics ))
outputs.append(f"{config['accession']}-heatmap.pdf")
# add deconvolution results for some accessions
if run_deconvolution(config['accession']):
outputs.append(f"{config['accession']}-deconvolution.proportions.tsv")
if 'baseline-coexpression' in config['tool'] or 'all-baseline' in config['tool'] and skip(config['accession'],'baseline-coexpression'):
metric_link_coexp=False
for m in metrics:
expression_file=f"{config['accession']}-{m}.tsv"
print(f"Checking file size for {expression_file} and number of columns")
if os.path.getsize(expression_file) > 0 and get_number_columns(expression_file)>4:
metric_link_coexp=True
outputs.extend(expand(f"{config['accession']}-{m}-coexpressions.tsv.gz"))
if metric_link_coexp == True:
outputs.extend(expand(f"{config['accession']}-coexpressions.tsv.gz" ))
print(outputs)
print('Getting list of outputs.. done')
print(datetime.datetime.now())
return outputs
rule all:
input:
required_outputs=get_outputs()