Skip to content

Commit

Permalink
Merge pull request #272 from microbiomedata/252-workflow-automation-b…
Browse files Browse the repository at this point in the history
…erkley-rc1

252 workflow automation berkley rc1
  • Loading branch information
aclum authored Oct 15, 2024
2 parents c003242 + 74a6c92 commit 495142f
Show file tree
Hide file tree
Showing 97 changed files with 8,285 additions and 3,169 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ attic
private
configs/.local_*
attic/data/dryrun_data/
nmdc_automation/workflow_automation/_state/*.state
nmdc_automation/workflow_automation/_state/*.json

# Ignore `coverage.xml` file in this directory.
/coverage.xml
100 changes: 50 additions & 50 deletions configs/import-mt.yaml
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
Workflows:
- Name: Metatranscriptome Reads QC
Import: true
Type: nmdc:ReadQcAnalysisActivity
Type: nmdc:ReadQcAnalysis
Git_repo: https://github.com/microbiomedata/metaT_ReadsQC
Version: v0.0.7
Collection: read_qc_analysis_activity_set
ActivityRange: ReadQcAnalysisActivity
Collection: workflow_execution_set
WorkflowExecutionRange: ReadQcAnalysis
Inputs:
- Metagenome Raw Reads
Activity:
name: "Read QC Activity for {id}"
Workflow_Execution:
name: "Read QC for {id}"
input_read_bases: "{outputs.stats.input_read_bases}"
input_read_count: "{outputs.stats.input_read_count}"
output_read_bases: "{outputs.stats.output_read_bases}"
output_read_count: "{outputs.stats.output_read_count}"
type: nmdc:ReadQcAnalysisActivity
type: nmdc:ReadQcAnalysis
Outputs:
- Filtered Sequencing Reads
- QC Statistics
Expand All @@ -26,12 +26,12 @@ Workflows:
Type: nmdc:MetatranscriptomeAssembly
Git_repo: https://github.com/microbiomedata/metaT_Assembly
Version: v0.0.2
Collection: metatranscriptome_assembly_set
ActivityRange: MetatranscriptomeAssembly
Collection: workflow_execution_set
WorkflowExecutionRange: MetatranscriptomeAssembly
Inputs:
- Filtered Sequencing Reads
Activity:
name: "Metagenome Assembly Activity for {id}"
Workflow_Execution:
name: "Metagenome Assembly for {id}"
type: nmdc:MetatranscriptomeAssembly
asm_score: "{outputs.stats.asm_score}"
contig_bp: "{outputs.stats.contig_bp}"
Expand Down Expand Up @@ -67,16 +67,16 @@ Workflows:

- Name: Metatranscriptome Annotation
Import: false
Type: nmdc:MetatranscriptomeAnnotationActivity
Type: nmdc:MetatranscriptomeAnnotation
Git_repo: https://github.com/microbiomedata/mg_annotation
Version: v1.1.4
Collection: metatranscriptome_annotation_set
ActivityRange: MetatranscriptomeAnnotationActivity
Collection: workflow_execution_set
WorkflowExecutionRange: MetatranscriptomeAnnotation
Inputs:
- Assembly Contigs
Activity:
name: "Metatranscriptome Annotation Analysis Activity for {id}"
type: nmdc:MetatranscriptomeAnnotationActivity
Workflow_Execution:
name: "Metatranscriptome Annotation Analysis for {id}"
type: nmdc:MetatranscriptomeAnnotation
Outputs:
- Annotation Amino Acid FASTA
- Structural Annotation GFF
Expand Down Expand Up @@ -111,12 +111,12 @@ Workflows:
Git_repo: https://github.com/microbiomedata/metaT_ReadCounts
Version: v0.0.5
Collection: metatranscriptome_expression_analysis_set
ActivityRange: MetatranscriptomeExpressionAnalysis
WorkflowExecutionRange: MetatranscriptomeExpressionAnalysis
Inputs:
- Functional Annotation GFF
- Contig Mapping File
- Assembly Coverage BAM
Activity:
Workflow_Execution:
name: "Metatranscriptome Expression Analysis for {id}"
type: nmdc:MetatranscriptomeExpressionAnalysis
Outputs:
Expand All @@ -130,8 +130,8 @@ Data Objects:
name: Raw sequencer read data
import_suffix: .[A-Z]+-[A-Z]+.fastq.gz
nmdc_suffix: .fastq.gz
input_to: [nmdc:ReadQcAnalysisActivity]
output_of: nmdc:OmicsProcessing
input_to: [nmdc:ReadQcAnalysis]
output_of: nmdc:NucleotideSequencing
mulitple: false
action: none
- data_object_type: Annotation Amino Acid FASTA
Expand All @@ -140,7 +140,7 @@ Data Objects:
import_suffix: _proteins.faa
nmdc_suffix: _proteins.faa
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
action: rename
- data_object_type: Contig Mapping File
Expand All @@ -149,7 +149,7 @@ Data Objects:
import_suffix: _contig_names_mapping.tsv
nmdc_suffix: _contig_names_mapping.tsv
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
action: rename
- data_object_type: Structural Annotation GFF
Expand All @@ -158,7 +158,7 @@ Data Objects:
import_suffix: _structural_annotation.gff
nmdc_suffix: _structural_annotation.gff
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
action: rename
- data_object_type: Functional Annotation GFF
Expand All @@ -167,7 +167,7 @@ Data Objects:
import_suffix: _functional_annotation.gff
nmdc_suffix: _functional_annotation.gff
input_to: [nmdc:MetatranscriptomeExpressionAnalysis]
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
action: rename
- data_object_type: Annotation KEGG Orthology
Expand All @@ -176,7 +176,7 @@ Data Objects:
import_suffix: _ko.tsv
nmdc_suffix: _ko.tsv
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
action: rename
- data_object_type: Annotation Enzyme Commission
Expand All @@ -185,7 +185,7 @@ Data Objects:
import_suffix: _ec.tsv
nmdc_suffix: _ec.tsv
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
action: rename
- data_object_type Scaffold Lineage tsv
Expand All @@ -194,15 +194,15 @@ Data Objects:
import_suffix: _scaffold_lineage.tsv
nmdc_suffix: _scaffold_lineage.tsv
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
- data_object_type: Clusters of Orthologous Groups (COG) Annotation GFF
description: COGs for {id}
name: GFF3 format file with COGs
import_suffix: _cog.gff
nmdc_suffix: _cog.gff
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
action: rename
- data_object_type: Pfam Annotation GFF
Expand All @@ -211,7 +211,7 @@ Data Objects:
import_suffix: _pfam.gff
nmdc_suffix: _pfam.gff
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
action: rename
- data_object_type: TIGRFam Annotation GFF
Expand All @@ -220,7 +220,7 @@ Data Objects:
import_suffix: _tigrfam.gff
nmdc_suffix: _tigrfam.gff
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
action: rename
- data_object_type: SMART Annotation GFF
Expand All @@ -229,7 +229,7 @@ Data Objects:
import_suffix: _smart.gff
nmdc_suffix: _smart.gff
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
action: rename
- data_object_type: SUPERFam Annotation GFF
Expand All @@ -238,7 +238,7 @@ Data Objects:
import_suffix: _supfam.gff
nmdc_suffix: _supfam.gff
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
action: rename
- data_object_type: CATH FunFams (Functional Families) Annotation GFF
Expand All @@ -247,7 +247,7 @@ Data Objects:
import_suffix: _cath_funfam.gff
nmdc_suffix: _cath_funfam.gff
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
action: rename
- data_object_type: CRT Annotation GFF
Expand All @@ -256,7 +256,7 @@ Data Objects:
import_suffix: _crt.gff
nmdc_suffix: _crt.gff
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
action: rename
- data_object_type: Genemark Annotation GFF
Expand All @@ -265,7 +265,7 @@ Data Objects:
import_suffix: _genemark.gff
nmdc_suffix: _genemark.gff
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
action: rename
- data_object_type: Prodigal Annotation GFF
Expand All @@ -274,7 +274,7 @@ Data Objects:
import_suffix: _prodigal.gff
nmdc_suffix: _prodigal.gff
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
action: rename
- data_object_type: TRNA Annotation GFF
Expand All @@ -283,7 +283,7 @@ Data Objects:
import_suffix: _trna.gff
nmdc_suffix: _trna.gff
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
action: rename
- data_object_type: RFAM Annotation GFF
Expand All @@ -292,7 +292,7 @@ Data Objects:
import_suffix: _rfam.gff
nmdc_suffix: _rfam.gff
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
action: rename
- data_object_type: KO_EC Annotation GFF
Expand All @@ -301,7 +301,7 @@ Data Objects:
import_suffix: _ko_ec.gff
nmdc_suffix: _ko_ec.gff
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
action: rename
- data_object_type: Product Names
Expand All @@ -310,7 +310,7 @@ Data Objects:
import_suffix: _product_names.tsv
nmdc_suffix: _product_names.tsv
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
action: rename
- data_object_type: Gene Phylogeny tsv
Expand All @@ -319,7 +319,7 @@ Data Objects:
import_suffix: _gene_phylogeny.tsv
nmdc_suffix: _gene_phylogeny.tsv
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
action: rename
- data_object_type: Crispr Terms
Expand All @@ -328,7 +328,7 @@ Data Objects:
import_suffix: _crt.crisprs
nmdc_suffix: _crt.crisprs
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
action: rename
- data_object_type: Annotation Statistics
Expand All @@ -337,7 +337,7 @@ Data Objects:
import_suffix: _stats.tsv
nmdc_suffix: _stats.tsv
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
action: rename
- data_object_type: Annotation Info File
Expand All @@ -346,23 +346,23 @@ Data Objects:
import_suffix: _imgap.info
nmdc_suffix: _imgap.info
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
action: rename
- data_object_type: Assembly Contigs
description: Assembly contigs (remapped) for {id}
import_suffix: _contigs.fna
nmdc_suffix: _renamed_contigs.fna
input_to: []
output_of: nmdc:MetatranscriptomeAnnotationActivity
output_of: nmdc:MetatranscriptomeAnnotation
mulitple: false
- data_object_type: Filtered Sequencing Reads
description: Reads QC for {id}
name: Reads QC result fastq (clean data)
import_suffix: filter-MTF.fastq.gz
nmdc_suffix: _filtered.fastq.gz
input_to: [nmdc:MetatranscriptomeAssembly]
output_of: nmdc:ReadQcAnalysisActivity
output_of: nmdc:ReadQcAnalysis
mulitple: false
action: rename
- data_object_type: rRNA Filtered Sequencing Reads
Expand All @@ -371,7 +371,7 @@ Data Objects:
import_suffix: .rRNA.fastq.gz
nmdc_suffix: _rRNA.fastq.gz
input_to: []
output_of: nmdc:ReadQcAnalysisActivity
output_of: nmdc:ReadQcAnalysis
mulitple: false
action: rename
- data_object_type: QC Statistics
Expand All @@ -380,7 +380,7 @@ Data Objects:
import_suffix: .filtered-report.txt
nmdc_suffix: _filterStats.txt
input_to: []
output_of: nmdc:ReadQcAnalysisActivity
output_of: nmdc:ReadQcAnalysis
mulitple: false
action: rename
- data_object_type: Read Filtering Info File
Expand All @@ -389,15 +389,15 @@ Data Objects:
import_suffix: .filter_cmd-MTF.sh
nmdc_suffix: _readsQC.info
input_to: []
output_of: nmdc:ReadQcAnalysisActivity
output_of: nmdc:ReadQcAnalysis
mulitple: false
action: rename
- data_object_type: Assembly Contigs
description: Assembly contigs for {id}
name: Final assembly contigs fasta
import_suffix: assembly.contigs.fasta
nmdc_suffix: _contigs.fna
input_to: [nmdc:MetatranscriptomeAnnotationActivity]
input_to: [nmdc:MetatranscriptomeAnnotation]
output_of: nmdc:MetatranscriptomeAssembly
mulitple: false
action: rename
Expand Down
Loading

0 comments on commit 495142f

Please sign in to comment.