diff --git a/.gitignore b/.gitignore index 4c54769e..4c454286 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,8 @@ attic private configs/.local_* attic/data/dryrun_data/ +nmdc_automation/workflow_automation/_state/*.state +nmdc_automation/workflow_automation/_state/*.json # Ignore `coverage.xml` file in this directory. /coverage.xml diff --git a/configs/import-mt.yaml b/configs/import-mt.yaml index 16788c85..1d3eedba 100644 --- a/configs/import-mt.yaml +++ b/configs/import-mt.yaml @@ -1,20 +1,20 @@ Workflows: - Name: Metatranscriptome Reads QC Import: true - Type: nmdc:ReadQcAnalysisActivity + Type: nmdc:ReadQcAnalysis Git_repo: https://github.com/microbiomedata/metaT_ReadsQC Version: v0.0.7 - Collection: read_qc_analysis_activity_set - ActivityRange: ReadQcAnalysisActivity + Collection: workflow_execution_set + WorkflowExecutionRange: ReadQcAnalysis Inputs: - Metagenome Raw Reads - Activity: - name: "Read QC Activity for {id}" + Workflow_Execution: + name: "Read QC for {id}" input_read_bases: "{outputs.stats.input_read_bases}" input_read_count: "{outputs.stats.input_read_count}" output_read_bases: "{outputs.stats.output_read_bases}" output_read_count: "{outputs.stats.output_read_count}" - type: nmdc:ReadQcAnalysisActivity + type: nmdc:ReadQcAnalysis Outputs: - Filtered Sequencing Reads - QC Statistics @@ -26,12 +26,12 @@ Workflows: Type: nmdc:MetatranscriptomeAssembly Git_repo: https://github.com/microbiomedata/metaT_Assembly Version: v0.0.2 - Collection: metatranscriptome_assembly_set - ActivityRange: MetatranscriptomeAssembly + Collection: workflow_execution_set + WorkflowExecutionRange: MetatranscriptomeAssembly Inputs: - Filtered Sequencing Reads - Activity: - name: "Metagenome Assembly Activity for {id}" + Workflow_Execution: + name: "Metagenome Assembly for {id}" type: nmdc:MetatranscriptomeAssembly asm_score: "{outputs.stats.asm_score}" contig_bp: "{outputs.stats.contig_bp}" @@ -67,16 +67,16 @@ Workflows: - Name: Metatranscriptome Annotation Import: false - Type: nmdc:MetatranscriptomeAnnotationActivity + Type: nmdc:MetatranscriptomeAnnotation Git_repo: https://github.com/microbiomedata/mg_annotation Version: v1.1.4 - Collection: metatranscriptome_annotation_set - ActivityRange: MetatranscriptomeAnnotationActivity + Collection: workflow_execution_set + WorkflowExecutionRange: MetatranscriptomeAnnotation Inputs: - Assembly Contigs - Activity: - name: "Metatranscriptome Annotation Analysis Activity for {id}" - type: nmdc:MetatranscriptomeAnnotationActivity + Workflow_Execution: + name: "Metatranscriptome Annotation Analysis for {id}" + type: nmdc:MetatranscriptomeAnnotation Outputs: - Annotation Amino Acid FASTA - Structural Annotation GFF @@ -111,12 +111,12 @@ Workflows: Git_repo: https://github.com/microbiomedata/metaT_ReadCounts Version: v0.0.5 Collection: metatranscriptome_expression_analysis_set - ActivityRange: MetatranscriptomeExpressionAnalysis + WorkflowExecutionRange: MetatranscriptomeExpressionAnalysis Inputs: - Functional Annotation GFF - Contig Mapping File - Assembly Coverage BAM - Activity: + Workflow_Execution: name: "Metatranscriptome Expression Analysis for {id}" type: nmdc:MetatranscriptomeExpressionAnalysis Outputs: @@ -130,8 +130,8 @@ Data Objects: name: Raw sequencer read data import_suffix: .[A-Z]+-[A-Z]+.fastq.gz nmdc_suffix: .fastq.gz - input_to: [nmdc:ReadQcAnalysisActivity] - output_of: nmdc:OmicsProcessing + input_to: [nmdc:ReadQcAnalysis] + output_of: nmdc:NucleotideSequencing mulitple: false action: none - data_object_type: Annotation Amino Acid FASTA @@ -140,7 +140,7 @@ Data Objects: import_suffix: _proteins.faa nmdc_suffix: _proteins.faa input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false action: rename - data_object_type: Contig Mapping File @@ -149,7 +149,7 @@ Data Objects: import_suffix: _contig_names_mapping.tsv nmdc_suffix: _contig_names_mapping.tsv input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false action: rename - data_object_type: Structural Annotation GFF @@ -158,7 +158,7 @@ Data Objects: import_suffix: _structural_annotation.gff nmdc_suffix: _structural_annotation.gff input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false action: rename - data_object_type: Functional Annotation GFF @@ -167,7 +167,7 @@ Data Objects: import_suffix: _functional_annotation.gff nmdc_suffix: _functional_annotation.gff input_to: [nmdc:MetatranscriptomeExpressionAnalysis] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false action: rename - data_object_type: Annotation KEGG Orthology @@ -176,7 +176,7 @@ Data Objects: import_suffix: _ko.tsv nmdc_suffix: _ko.tsv input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false action: rename - data_object_type: Annotation Enzyme Commission @@ -185,7 +185,7 @@ Data Objects: import_suffix: _ec.tsv nmdc_suffix: _ec.tsv input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false action: rename - data_object_type Scaffold Lineage tsv @@ -194,7 +194,7 @@ Data Objects: import_suffix: _scaffold_lineage.tsv nmdc_suffix: _scaffold_lineage.tsv input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false - data_object_type: Clusters of Orthologous Groups (COG) Annotation GFF description: COGs for {id} @@ -202,7 +202,7 @@ Data Objects: import_suffix: _cog.gff nmdc_suffix: _cog.gff input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false action: rename - data_object_type: Pfam Annotation GFF @@ -211,7 +211,7 @@ Data Objects: import_suffix: _pfam.gff nmdc_suffix: _pfam.gff input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false action: rename - data_object_type: TIGRFam Annotation GFF @@ -220,7 +220,7 @@ Data Objects: import_suffix: _tigrfam.gff nmdc_suffix: _tigrfam.gff input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false action: rename - data_object_type: SMART Annotation GFF @@ -229,7 +229,7 @@ Data Objects: import_suffix: _smart.gff nmdc_suffix: _smart.gff input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false action: rename - data_object_type: SUPERFam Annotation GFF @@ -238,7 +238,7 @@ Data Objects: import_suffix: _supfam.gff nmdc_suffix: _supfam.gff input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false action: rename - data_object_type: CATH FunFams (Functional Families) Annotation GFF @@ -247,7 +247,7 @@ Data Objects: import_suffix: _cath_funfam.gff nmdc_suffix: _cath_funfam.gff input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false action: rename - data_object_type: CRT Annotation GFF @@ -256,7 +256,7 @@ Data Objects: import_suffix: _crt.gff nmdc_suffix: _crt.gff input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false action: rename - data_object_type: Genemark Annotation GFF @@ -265,7 +265,7 @@ Data Objects: import_suffix: _genemark.gff nmdc_suffix: _genemark.gff input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false action: rename - data_object_type: Prodigal Annotation GFF @@ -274,7 +274,7 @@ Data Objects: import_suffix: _prodigal.gff nmdc_suffix: _prodigal.gff input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false action: rename - data_object_type: TRNA Annotation GFF @@ -283,7 +283,7 @@ Data Objects: import_suffix: _trna.gff nmdc_suffix: _trna.gff input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false action: rename - data_object_type: RFAM Annotation GFF @@ -292,7 +292,7 @@ Data Objects: import_suffix: _rfam.gff nmdc_suffix: _rfam.gff input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false action: rename - data_object_type: KO_EC Annotation GFF @@ -301,7 +301,7 @@ Data Objects: import_suffix: _ko_ec.gff nmdc_suffix: _ko_ec.gff input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false action: rename - data_object_type: Product Names @@ -310,7 +310,7 @@ Data Objects: import_suffix: _product_names.tsv nmdc_suffix: _product_names.tsv input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false action: rename - data_object_type: Gene Phylogeny tsv @@ -319,7 +319,7 @@ Data Objects: import_suffix: _gene_phylogeny.tsv nmdc_suffix: _gene_phylogeny.tsv input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false action: rename - data_object_type: Crispr Terms @@ -328,7 +328,7 @@ Data Objects: import_suffix: _crt.crisprs nmdc_suffix: _crt.crisprs input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false action: rename - data_object_type: Annotation Statistics @@ -337,7 +337,7 @@ Data Objects: import_suffix: _stats.tsv nmdc_suffix: _stats.tsv input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false action: rename - data_object_type: Annotation Info File @@ -346,7 +346,7 @@ Data Objects: import_suffix: _imgap.info nmdc_suffix: _imgap.info input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false action: rename - data_object_type: Assembly Contigs @@ -354,7 +354,7 @@ Data Objects: import_suffix: _contigs.fna nmdc_suffix: _renamed_contigs.fna input_to: [] - output_of: nmdc:MetatranscriptomeAnnotationActivity + output_of: nmdc:MetatranscriptomeAnnotation mulitple: false - data_object_type: Filtered Sequencing Reads description: Reads QC for {id} @@ -362,7 +362,7 @@ Data Objects: import_suffix: filter-MTF.fastq.gz nmdc_suffix: _filtered.fastq.gz input_to: [nmdc:MetatranscriptomeAssembly] - output_of: nmdc:ReadQcAnalysisActivity + output_of: nmdc:ReadQcAnalysis mulitple: false action: rename - data_object_type: rRNA Filtered Sequencing Reads @@ -371,7 +371,7 @@ Data Objects: import_suffix: .rRNA.fastq.gz nmdc_suffix: _rRNA.fastq.gz input_to: [] - output_of: nmdc:ReadQcAnalysisActivity + output_of: nmdc:ReadQcAnalysis mulitple: false action: rename - data_object_type: QC Statistics @@ -380,7 +380,7 @@ Data Objects: import_suffix: .filtered-report.txt nmdc_suffix: _filterStats.txt input_to: [] - output_of: nmdc:ReadQcAnalysisActivity + output_of: nmdc:ReadQcAnalysis mulitple: false action: rename - data_object_type: Read Filtering Info File @@ -389,7 +389,7 @@ Data Objects: import_suffix: .filter_cmd-MTF.sh nmdc_suffix: _readsQC.info input_to: [] - output_of: nmdc:ReadQcAnalysisActivity + output_of: nmdc:ReadQcAnalysis mulitple: false action: rename - data_object_type: Assembly Contigs @@ -397,7 +397,7 @@ Data Objects: name: Final assembly contigs fasta import_suffix: assembly.contigs.fasta nmdc_suffix: _contigs.fna - input_to: [nmdc:MetatranscriptomeAnnotationActivity] + input_to: [nmdc:MetatranscriptomeAnnotation] output_of: nmdc:MetatranscriptomeAssembly mulitple: false action: rename diff --git a/configs/import.yaml b/configs/import.yaml index 34832579..787bf0ea 100644 --- a/configs/import.yaml +++ b/configs/import.yaml @@ -1,36 +1,36 @@ Workflows: - Name: Reads QC Import: true - Type: nmdc:ReadQcAnalysisActivity + Type: nmdc:ReadQcAnalysis Git_repo: https://github.com/microbiomedata/ReadsQC - Version: v1.0.8 - Collection: read_qc_analysis_activity_set - ActivityRange: ReadQcAnalysisActivity + Version: v1.0.12 + Collection: workflow_execution_set + WorkflowExecutionRange: ReadQcAnalysis Inputs: - Metagenome Raw Reads - Activity: - name: "Read QC Activity for {id}" + Workflow_Execution: + name: "Read QC for {id}" input_read_bases: "{outputs.stats.input_read_bases}" input_read_count: "{outputs.stats.input_read_count}" output_read_bases: "{outputs.stats.output_read_bases}" output_read_count: "{outputs.stats.output_read_count}" - type: nmdc:ReadQcAnalysisActivity + type: nmdc:ReadQcAnalysis Outputs: - Filtered Sequencing Reads - QC Statistics - Name: Readbased Taxonomy Import: false - Type: nmdc:ReadBasedTaxonomyAnalysisActivity + Type: nmdc:ReadBasedTaxonomyAnalysis Git_repo: https://github.com/microbiomedata/ReadsQC Version: v1.0.5 - Collection: read_based_taxonomy_analysis_activity_set - ActivityRange: ReadBasedTaxonomyAnalysisActivity + Collection: workflow_execution_set + WorkflowExecutionRange: ReadBasedTaxonomyAnalysis Inputs: - Filtered Sequencing Reads - Activity: - name: Readbased Taxonomy Analysis Activity for {id} - type: nmdc:ReadBasedTaxonomyAnalysisActivity + Workflow_Execution: + name: Readbased Taxonomy Analysis for {id} + type: nmdc:ReadBasedTaxonomyAnalysis Outputs: - GOTTCHA2 Classification Report - GOTTCHA2 Report Full @@ -47,12 +47,12 @@ Workflows: Type: nmdc:MetagenomeAssembly Git_repo: https://github.com/microbiomedata/metaAssembly Version: v1.0.3 - Collection: metagenome_assembly_set - ActivityRange: MetagenomeAssembly + Collection: workflow_execution_set + WorkflowExecutionRange: MetagenomeAssembly Inputs: - Filtered Sequencing Reads - Activity: - name: "Metagenome Assembly Activity for {id}" + Workflow_Execution: + name: "Metagenome Assembly for {id}" type: nmdc:MetagenomeAssembly asm_score: "{outputs.stats.asm_score}" contig_bp: "{outputs.stats.contig_bp}" @@ -88,16 +88,16 @@ Workflows: - Name: Metagenome Annotation Import: false - Type: nmdc:MetagenomeAnnotationActivity + Type: nmdc:MetagenomeAnnotation Git_repo: https://github.com/microbiomedata/mg_annotation Version: v1.0.4 - Collection: metagenome_annotation_activity_set - ActivityRange: MetagenomeAnnotationActivity + Collection: workflow_execution_set + WorkflowExecutionRange: MetagenomeAnnotation Inputs: - Assembly Contigs - Activity: - name: "Metagenome Annotation Analysis Activity for {id}" - type: nmdc:MetagenomeAnnotationActivity + Workflow_Execution: + name: "Metagenome Annotation Analysis for {id}" + type: nmdc:MetagenomeAnnotation Outputs: - Annotation Amino Acid FASTA - Structural Annotation GFF @@ -124,11 +124,11 @@ Workflows: - Name: MAGs Import: false - Type: nmdc:MagsAnalysisActivity + Type: nmdc:MagsAnalysis Git_repo: https://github.com/microbiomedata/metaMAGs - Version: v1.0.6 - Collection: mags_activity_set - ActivityRange: MagsAnalysisActivity + Version: v1.3.11 + Collection: workflow_execution_set + WorkflowExecutionRange: MagsAnalysis Inputs: - Assembly Contigs - Functional Annotation GFF @@ -144,9 +144,9 @@ Workflows: - SMART Annotation GFF - Annotation Amino Acid FASTA - Gene Phylogeny tsv - Activity: - name: "Metagenome Assembled Genomes Analysis Activity for {id}" - type: nmdc:MagsAnalysisActivity + Workflow_Execution: + name: "Metagenome Assembled Genomes Analysis for {id}" + type: nmdc:MagsAnalysis Outputs: - CheckM Statistics - Metagenome Bins @@ -160,8 +160,8 @@ Data Objects: name: Raw sequencer read data import_suffix: .[A-Z]+-[A-Z]+.fastq.gz nmdc_suffix: .fastq.gz - input_to: [nmdc:ReadQcAnalysisActivity] - output_of: nmdc:OmicsProcessing + input_to: [nmdc:ReadQcAnalysis] + output_of: nmdc:NucleotideSequencing mulitple: false action: none - data_object_type: CheckM Statistics @@ -170,7 +170,7 @@ Data Objects: import_suffix: _checkm_qa.out nmdc_suffix: _checkm_qa.out input_to: [] - output_of: nmdc:MagsAnalysisActivity + output_of: nmdc:MagsAnalysis mulitple: false action: rename - data_object_type: GTDBTK Bacterial Summary @@ -179,7 +179,7 @@ Data Objects: import_suffix: _gtdbtk.bac122.summary.tsv nmdc_suffix: _gtdbtk.bac122.summary.tsv input_to: [] - output_of: nmdc:MagsAnalysisActivity + output_of: nmdc:MagsAnalysis mulitple: false action: rename - data_object_type: GTDBTK Archaeal Summary @@ -188,7 +188,7 @@ Data Objects: import_suffix: _gtdbtk.ar122.summary.tsv nmdc_suffix: _gtdbtk.ar122.summary.tsv input_to: [] - output_of: nmdc:MagsAnalysisActivity + output_of: nmdc:MagsAnalysis mulitple: false action: rename - data_object_type: Annotation Amino Acid FASTA @@ -196,8 +196,8 @@ Data Objects: name: FASTA amino acid file for annotated proteins import_suffix: _proteins.faa nmdc_suffix: _proteins.faa - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Contig Mapping File @@ -206,7 +206,7 @@ Data Objects: import_suffix: _contig_names_mapping.tsv nmdc_suffix: _contig_names_mapping.tsv input_to: [] - output_of: nmdc:MetagenomeAnnotationActivity + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Structural Annotation GFF @@ -215,7 +215,7 @@ Data Objects: import_suffix: _structural_annotation.gff nmdc_suffix: _structural_annotation.gff input_to: [] - output_of: nmdc:MetagenomeAnnotationActivity + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Functional Annotation GFF @@ -223,8 +223,8 @@ Data Objects: name: GFF3 format file with functional annotations import_suffix: _functional_annotation.gff nmdc_suffix: _functional_annotation.gff - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Annotation KEGG Orthology @@ -232,8 +232,8 @@ Data Objects: name: Tab delimited file for KO annotation import_suffix: _ko.tsv nmdc_suffix: _ko.tsv - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Annotation Enzyme Commission @@ -241,8 +241,8 @@ Data Objects: name: Tab delimited file for EC annotation import_suffix: _ec.tsv nmdc_suffix: _ec.tsv - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Clusters of Orthologous Groups (COG) Annotation GFF @@ -250,8 +250,8 @@ Data Objects: name: GFF3 format file with COGs import_suffix: _cog.gff nmdc_suffix: _cog.gff - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Pfam Annotation GFF @@ -259,8 +259,8 @@ Data Objects: name: GFF3 format file with Pfam import_suffix: _pfam.gff nmdc_suffix: _pfam.gff - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: TIGRFam Annotation GFF @@ -268,8 +268,8 @@ Data Objects: name: GFF3 format file with TIGRfam import_suffix: _tigrfam.gff nmdc_suffix: _tigrfam.gff - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: SMART Annotation GFF @@ -277,8 +277,8 @@ Data Objects: name: GFF3 format file with SMART import_suffix: _smart.gff nmdc_suffix: _smart.gff - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: SUPERFam Annotation GFF @@ -286,8 +286,8 @@ Data Objects: name: GFF3 format file with SUPERFam import_suffix: _supfam.gff nmdc_suffix: _supfam.gff - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: CATH FunFams (Functional Families) Annotation GFF @@ -295,8 +295,8 @@ Data Objects: name: GFF3 format file with CATH FunFams import_suffix: _cath_funfam.gff nmdc_suffix: _cath_funfam.gff - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: CRT Annotation GFF @@ -305,7 +305,7 @@ Data Objects: import_suffix: _crt.gff nmdc_suffix: _crt.gff input_to: [] - output_of: nmdc:MetagenomeAnnotationActivity + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Genemark Annotation GFF @@ -314,7 +314,7 @@ Data Objects: import_suffix: _genemark.gff nmdc_suffix: _genemark.gff input_to: [] - output_of: nmdc:MetagenomeAnnotationActivity + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Prodigal Annotation GFF @@ -323,7 +323,7 @@ Data Objects: import_suffix: _prodigal.gff nmdc_suffix: _prodigal.gff input_to: [] - output_of: nmdc:MetagenomeAnnotationActivity + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: TRNA Annotation GFF @@ -332,7 +332,7 @@ Data Objects: import_suffix: _trna.gff nmdc_suffix: _trna.gff input_to: [] - output_of: nmdc:MetagenomeAnnotationActivity + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: RFAM Annotation GFF @@ -341,7 +341,7 @@ Data Objects: import_suffix: _rfam.gff nmdc_suffix: _rfam.gff input_to: [] - output_of: nmdc:MetagenomeAnnotationActivity + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: KO_EC Annotation GFF @@ -350,7 +350,7 @@ Data Objects: import_suffix: _ko_ec.gff nmdc_suffix: _ko_ec.gff input_to: [] - output_of: nmdc:MetagenomeAnnotationActivity + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Product Names @@ -358,8 +358,8 @@ Data Objects: name: Product names file import_suffix: _product_names.tsv nmdc_suffix: _product_names.tsv - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Gene Phylogeny tsv @@ -367,8 +367,8 @@ Data Objects: name: Gene Phylogeny file import_suffix: _gene_phylogeny.tsv nmdc_suffix: _gene_phylogeny.tsv - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Crispr Terms @@ -377,7 +377,7 @@ Data Objects: import_suffix: _crt.crisprs nmdc_suffix: _crt.crisprs input_to: [] - output_of: nmdc:MetagenomeAnnotationActivity + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Annotation Statistics @@ -386,7 +386,7 @@ Data Objects: import_suffix: _stats.tsv nmdc_suffix: _stats.tsv input_to: [] - output_of: nmdc:MetagenomeAnnotationActivity + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Annotation Info File @@ -395,7 +395,7 @@ Data Objects: import_suffix: _imgap.info nmdc_suffix: _imgap.info input_to: [] - output_of: nmdc:MetagenomeAnnotationActivity + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Filtered Sequencing Reads @@ -403,8 +403,8 @@ Data Objects: name: Reads QC result fastq (clean data) import_suffix: filter-METAGENOME.fastq.gz nmdc_suffix: _filtered.fastq.gz - input_to: [nmdc:ReadBasedTaxonomyAnalysisActivity,nmdc:MetagenomeAssembly] - output_of: nmdc:ReadQcAnalysisActivity + input_to: [nmdc:ReadBasedTaxonomyAnalysis,nmdc:MetagenomeAssembly] + output_of: nmdc:ReadQcAnalysis mulitple: false action: rename - data_object_type: QC Statistics @@ -413,7 +413,7 @@ Data Objects: import_suffix: .filtered-report.txt nmdc_suffix: _filterStats.txt input_to: [] - output_of: nmdc:ReadQcAnalysisActivity + output_of: nmdc:ReadQcAnalysis mulitple: false action: rename - data_object_type: Read Filtering Info File @@ -422,7 +422,7 @@ Data Objects: import_suffix: _readsQC.info nmdc_suffix: _readsQC.info input_to: [] - output_of: nmdc:ReadQcAnalysisActivity + output_of: nmdc:ReadQcAnalysis mulitple: false action: rename - data_object_type: Assembly Contigs @@ -430,7 +430,7 @@ Data Objects: name: Final assembly contigs fasta import_suffix: assembly.contigs.fasta nmdc_suffix: _contigs.fna - input_to: [nmdc:MetagenomeAnnotationActivity,nmdc:MagsAnalysisActivity] + input_to: [nmdc:MetagenomeAnnotation,nmdc:MagsAnalysis] output_of: nmdc:MetagenomeAssembly mulitple: false action: rename @@ -475,7 +475,7 @@ Data Objects: name: Sorted bam file of reads mapping back to the final assembly import_suffix: pairedMapped.sam.gz nmdc_suffix: _pairedMapped_sorted.sam.gz - input_to: [nmdc:MagsAnalysisActivity] + input_to: [nmdc:MagsAnalysis] output_of: nmdc:MetagenomeAssembly mulitple: false action: rename @@ -485,7 +485,7 @@ Data Objects: import_suffix: _gottcha2_full.tsv nmdc_suffix: _gottcha2_full.tsv input_to: [] - output_of: nmdc:ReadBasedTaxonomyAnalysisActivity + output_of: nmdc:ReadBasedTaxonomyAnalysis mulitple: false action: rename - data_object_type: GOTTCHA2 Classification Report @@ -494,7 +494,7 @@ Data Objects: import_suffix: _gottcha2_classification.tsv nmdc_suffix: _gottcha2_classification.tsv input_to: [] - output_of: nmdc:ReadBasedTaxonomyAnalysisActivity + output_of: nmdc:ReadBasedTaxonomyAnalysis mulitple: false action: rename - data_object_type: GOTTCHA2 Krona Plot @@ -503,7 +503,7 @@ Data Objects: import_suffix: _gottcha2_krona.html nmdc_suffix: _gottcha2_krona.html input_to: [] - output_of: nmdc:ReadBasedTaxonomyAnalysisActivity + output_of: nmdc:ReadBasedTaxonomyAnalysis mulitple: false action: rename - data_object_type: Centrifuge Taxonomic Classification @@ -512,7 +512,7 @@ Data Objects: import_suffix: _centrifuge_classification.tsv nmdc_suffix: _centrifuge_classification.tsv input_to: [] - output_of: nmdc:ReadBasedTaxonomyAnalysisActivity + output_of: nmdc:ReadBasedTaxonomyAnalysis mulitple: false action: rename - data_object_type: Centrifuge output report file @@ -521,7 +521,7 @@ Data Objects: import_suffix: _centrifuge_report.tsv nmdc_suffix: _centrifuge_report.tsv input_to: [] - output_of: nmdc:ReadbasedTaxonomyAnalysisActivity + output_of: nmdc:ReadBasedTaxonomyAnalysis mulitple: false action: rename - data_object_type: Centrifuge Krona Plot @@ -530,7 +530,7 @@ Data Objects: import_suffix: _centrifuge_krona.html nmdc_suffix: _centrifuge_krona.html input_to: [] - output_of: nmdc:ReadbasedTaxonomyAnalysisActivity + output_of: nmdc:ReadBasedTaxonomyAnalysis mulitple: false action: rename - data_object_type: Kraken2 Classification Report @@ -539,7 +539,7 @@ Data Objects: import_suffix: _kraken2_report.tsv nmdc_suffix: _kraken2_report.tsv input_to: [] - output_of: nmdc:ReadbasedTaxonomyAnalysisActivity + output_of: nmdc:ReadBasedTaxonomyAnalysis mulitple: false action: rename - data_object_type: Kraken2 Taxonomic Classification @@ -548,7 +548,7 @@ Data Objects: import_suffix: _kraken2_classification.tsv nmdc_suffix: _kraken2_classification.tsv input_to: [] - output_of: nmdc:ReadbasedTaxonomyAnalysisActivity + output_of: nmdc:ReadBasedTaxonomyAnalysis mulitple: false action: rename - data_object_type: Kraken2 Krona Plot @@ -557,7 +557,7 @@ Data Objects: import_suffix: _kraken2_krona.html nmdc_suffix: _kraken2_krona.html input_to: [] - output_of: nmdc:ReadbasedTaxonomyAnalysisActivity + output_of: nmdc:ReadBasedTaxonomyAnalysis mulitple: false action: rename Multiples: @@ -567,7 +567,7 @@ Data Objects: import_suffix: _[0-9]+.tar.gz nmdc_suffix: _hqmq_bin.zip input_to: [] - output_of: nmdc:MagsAnalysisActivity + output_of: nmdc:MagsAnalysis mulitple: true action: zip diff --git a/nmdc_automation/__init__.py b/nmdc_automation/__init__.py index 2b16126a..0dd591c9 100644 --- a/nmdc_automation/__init__.py +++ b/nmdc_automation/__init__.py @@ -1,4 +1,4 @@ from .api import nmdcapi -from .config import config +from .config import siteconfig from .import_automation import activity_mapper -from .workflow_automation import watch_nmdc, wfutils, workflows, activities +from .workflow_automation import watch_nmdc, wfutils, workflows, workflow_process diff --git a/nmdc_automation/api/jawsapi.py b/nmdc_automation/api/jawsapi.py index 67e29777..0293893b 100644 --- a/nmdc_automation/api/jawsapi.py +++ b/nmdc_automation/api/jawsapi.py @@ -2,7 +2,7 @@ import requests import uuid -from nmdc_automation.config import Config +from nmdc_automation.config import SiteConfig _base_url = "http://jaws.lbl.gov:5003/api/v2" _base_in = "/pscratch/sd/n/nmjaws/nmdc-prod/inputs" @@ -14,7 +14,7 @@ class JawsApi: def __init__(self, site_configuration): - self.config = Config(site_configuration) + self.config = SiteConfig(site_configuration) self._base_url = self.config.api_url self.client_id = self.config.client_id self.client_secret = self.config.client_secret diff --git a/nmdc_automation/api/nmdcapi.py b/nmdc_automation/api/nmdcapi.py index 3e83a01f..15efb92c 100755 --- a/nmdc_automation/api/nmdcapi.py +++ b/nmdc_automation/api/nmdcapi.py @@ -10,26 +10,30 @@ import mimetypes from pathlib import Path from time import time -from typing import Union +from typing import Union, List from datetime import datetime, timedelta, timezone -from nmdc_automation.config import Config, UserConfig +from nmdc_automation.config import SiteConfig, UserConfig import logging -def _get_sha256(fn): - hashfn = fn + ".sha256" - if os.path.exists(hashfn): - with open(hashfn) as f: +def _get_sha256(fn: Union[str, Path]) -> str: + """ + Helper function to get the sha256 hash of a file if it exists. + """ + shahash = hashlib.sha256() + if isinstance(fn, str): + fn = Path(fn) + hash_fn = fn.with_suffix(".sha256") + if hash_fn.exists(): + with hash_fn.open() as f: sha = f.read().rstrip() else: - logging.info("hashing %s" % (fn)) - shahash = hashlib.sha256() - with open(fn, "rb") as f: - # Read and update hash string value in blocks of 4K + logging.info(f"hashing {fn}") + with fn.open("rb") as f: for byte_block in iter(lambda: f.read(1048576), b""): shahash.update(byte_block) sha = shahash.hexdigest() - with open(hashfn, "w") as f: + with hash_fn.open("w") as f: f.write(sha) f.write("\n") return sha @@ -46,8 +50,10 @@ class NmdcRuntimeApi: client_id = None client_secret = None - def __init__(self, site_configuration: Union[str, Path]): - self.config = Config(site_configuration) + def __init__(self, site_configuration: Union[str, Path, SiteConfig]): + if isinstance(site_configuration, str) or isinstance(site_configuration, Path): + site_configuration = SiteConfig(site_configuration) + self.config = site_configuration self._base_url = self.config.api_url self.client_id = self.config.client_id self.client_secret = self.config.client_secret @@ -184,7 +190,7 @@ def create_object(self, fn, description, dataurl): @refresh_token def post_objects(self, obj_data): - url = self._base_url + "workflows/activities" + url = self._base_url + "workflows/workflow_executions" resp = requests.post(url, headers=self.header, data=json.dumps(obj_data)) return resp.json() @@ -205,8 +211,9 @@ def bump_time(self, obj): resp = requests.patch(url, headers=self.header, data=json.dumps(d)) return resp.json() + # TODO test that this concatenates multi-page results @refresh_token - def list_jobs(self, filt=None, max=100): + def list_jobs(self, filt=None, max=100) -> List[dict]: url = "%sjobs?max_page_size=%s" % (self._base_url, max) d = {} if filt: @@ -316,6 +323,7 @@ def run_query(self, query): return resp.json() +# TODO - This is deprecated and should be removed along with the re_iding code that uses it class NmdcRuntimeUserApi: """ Basic Runtime API Client with user/password authentication. diff --git a/nmdc_automation/config/__init__.py b/nmdc_automation/config/__init__.py index aaee0ab5..9f38ee32 100644 --- a/nmdc_automation/config/__init__.py +++ b/nmdc_automation/config/__init__.py @@ -1 +1 @@ -from .config import Config, UserConfig +from .siteconfig import SiteConfig, UserConfig diff --git a/nmdc_automation/config/config.py b/nmdc_automation/config/siteconfig.py similarity index 90% rename from nmdc_automation/config/config.py rename to nmdc_automation/config/siteconfig.py index f6ef4d4e..a681d8fa 100644 --- a/nmdc_automation/config/config.py +++ b/nmdc_automation/config/siteconfig.py @@ -1,14 +1,18 @@ -from pathlib import Path import tomli from typing import Union import yaml -import os from pathlib import Path +import warnings WORKFLOWS_DIR = Path(__file__).parent / "workflows" class UserConfig: def __init__(self, path): + warnings.warn( + "UserConfig is deprecated and will be removed in a future release. Use SiteConfig instead.", + DeprecationWarning, + stacklevel=2, + ) with open(path, "rb") as file: self.config_data = tomli.load(file) @@ -24,7 +28,7 @@ def username(self): def password(self): return self.config_data["api"]["password"] -class Config: +class SiteConfig: def __init__(self, path: Union[str, Path]): with open(path, "rb") as file: self.config_data = tomli.load(file) @@ -75,7 +79,7 @@ def watch_state(self): @property def agent_state(self): - return self.config_data["state"]["agent_state"] + return self.config_data.get("state", {}).get("agent_state", None) @property def activity_id_state(self): diff --git a/nmdc_automation/config/workflows/workflows-mt.yaml b/nmdc_automation/config/workflows/workflows-mt.yaml index 930fb8c4..1c8c8751 100644 --- a/nmdc_automation/config/workflows/workflows-mt.yaml +++ b/nmdc_automation/config/workflows/workflows-mt.yaml @@ -1,6 +1,6 @@ Workflows: - Name: Sequencing Noninterleaved - Collection: omics_processing_set + Collection: data_generation_set Enabled: True Analyte Category: Metatranscriptome Filter Output Objects: @@ -8,20 +8,20 @@ Workflows: - Metagenome Raw Read 2 - Name: Sequencing Interleaved - Collection: omics_processing_set + Collection: data_generation_set Enabled: True Analyte Category: Metatranscriptome Filter Output Objects: - Metagenome Raw Reads - Name: Metatranscriptome Reads QC - Type: nmdc:ReadQcAnalysisActivity + Type: nmdc:ReadQcAnalysis Enabled: True Analyte Category: Metatranscriptome Git_repo: https://github.com/microbiomedata/metaT_ReadsQC Version: v0.0.7 WDL: rqcfilter.wdl - Collection: read_qc_analysis_activity_set + Collection: workflow_execution_set Filter Input Objects: - Metagenome Raw Reads Predecessors: @@ -30,14 +30,14 @@ Workflows: Input_prefix: nmdc_rqcfilter Inputs: input_files: do:Metagenome Raw Reads - proj: "{activity_id}" - Activity: - name: "Read QC Activity for {id}" + proj: "{workflow_execution_id}" + Workflow Execution: + name: "Read QC for {id}" input_read_bases: "{outputs.stats.input_read_bases}" input_read_count: "{outputs.stats.input_read_count}" output_read_bases: "{outputs.stats.output_read_bases}" output_read_count: "{outputs.stats.output_read_count}" - type: nmdc:ReadQcAnalysisActivity + type: nmdc:ReadQcAnalysis Outputs: - output: filtered_final name: Reads QC result fastq (clean data) @@ -57,16 +57,16 @@ Workflows: description: "rRNA fastq for {id}" - Name: Metatranscriptome Reads QC Interleave - Type: nmdc:ReadQcAnalysisActivity + Type: nmdc:ReadQcAnalysis Enabled: True Analyte Category: Metatranscriptome Git_repo: https://github.com/microbiomedata/metaT_ReadsQC Version: v0.0.7 - Collection: read_qc_analysis_activity_set + Collection: workflow_execution_set WDL: interleave_rqcfilter.wdl Input_prefix: nmdc_rqcfilter Inputs: - proj: "{activity_id}" + proj: "{workflow_execution_id}" input_fastq1: do:Metagenome Raw Read 1 input_fastq2: do:Metagenome Raw Read 2 Filter Input Objects: @@ -74,13 +74,13 @@ Workflows: - Metagenome Raw Read 2 Predecessors: - Sequencing Noninterleaved - Activity: - name: "Read QC Activity for {id}" + Workflow Execution: + name: "Read QC for {id}" input_read_bases: "{outputs.stats.input_read_bases}" input_read_count: "{outputs.stats.input_read_count}" output_read_bases: "{outputs.stats.output_read_bases}" output_read_count: "{outputs.stats.output_read_count}" - type: nmdc:ReadQcAnalysisActivity + type: nmdc:ReadQcAnalysis Outputs: - output: filtered_final name: Reads QC result fastq (clean data) @@ -106,16 +106,16 @@ Workflows: Git_repo: https://github.com/microbiomedata/metaT_Assembly Version: v0.0.2 WDL: metaT_assembly.wdl - Collection: metatranscriptome_assembly_set + Collection: workflow_execution_set Predecessors: - Metatranscriptome Reads QC - Metatranscriptome Reads QC Interleave Input_prefix: jgi_metaASM Inputs: input_files: do:Filtered Sequencing Reads - proj: "{activity_id}" - Activity: - name: "Metatranscriptome Assembly Activity for {id}" + proj: "{workflow_execution_id}" + Workflow Execution: + name: "Metatranscriptome Assembly for {id}" type: nmdc:MetatranscriptomeAssembly asm_score: "{outputs.stats.asm_score}" contig_bp: "{outputs.stats.contig_bp}" @@ -153,23 +153,23 @@ Workflows: description: "Alignment index file for {id}" - Name: Metatranscriptome Annotation - Type: nmdc:MetatranscriptomeAnnotationActivity + Type: nmdc:MetatranscriptomeAnnotation Enabled: True Analyte Category: Metatranscriptome Git_repo: https://github.com/microbiomedata/mg_annotation Version: v1.1.4 WDL: annotation_full.wdl - Collection: metatranscriptome_annotation_set + Collection: workflow_execution_set Predecessors: - Metatranscriptome Assembly Input_prefix: annotation Inputs: input_file: do:Assembly Contigs imgap_project_id: "scaffold" - proj: "{activity_id}" - Activity: - name: "Metatranscriptome Annotation Analysis Activity for {id}" - type: nmdc:MetatranscriptomeAnnotationActivity + proj: "{workflow_execution_id}" + Workflow Execution: + name: "Metatranscriptome Annotation Analysis for {id}" + type: nmdc:MetatranscriptomeAnnotation Outputs: - output: proteins_faa data_object_type: Annotation Amino Acid FASTA @@ -282,7 +282,7 @@ Workflows: Git_repo: https://github.com/microbiomedata/metaT_ReadCounts Version: v0.0.5 WDL: readcount.wdl - Collection: metatranscriptome_expression_analysis_set + Collection: workflow_execution_set Predecessors: - Metatranscriptome Annotation Input_prefix: nmdc_expression @@ -291,8 +291,8 @@ Workflows: map: do:Contig Mapping File bam: do:Assembly Coverage BAM rna_type: "aRNA" - proj: "{activity_id}" - Activity: + proj: "{workflow_execution_id}" + Workflow Execution: name: "Metatranscriptome Expression Analysis for {id}" type: nmdc:MetatranscriptomeExpressionAnalysis Outputs: @@ -312,7 +312,7 @@ Workflows: Git_repo: https://github.com/microbiomedata/metaT_ReadCounts Version: v0.0.5 WDL: readcount.wdl - Collection: metatranscriptome_expression_analysis_set + Collection: workflow_execution_set Predecessors: - Metatranscriptome Annotation Input_prefix: nmdc_expression @@ -320,8 +320,8 @@ Workflows: gff_file: do:Functional Annotation GFF map: do:Contig Mapping File bam: do:Assembly Coverage BAM - proj: "{activity_id}" - Activity: + proj: "{workflow_execution_id}" + Workflow Execution: name: "Metatranscriptome Expression Analysis for {id}" type: nmdc:MetatranscriptomeExpressionAnalysis Outputs: @@ -341,7 +341,7 @@ Workflows: Git_repo: https://github.com/microbiomedata/metaT_ReadCounts Version: v0.0.5 WDL: readcount.wdl - Collection: metatranscriptome_expression_analysis_set + Collection: workflow_execution_set Predecessors: - Metatranscriptome Annotation Input_prefix: nmdc_expression @@ -350,8 +350,8 @@ Workflows: map: do:Contig Mapping File bam: do:Assembly Coverage BAM rna_type: "non_stranded_RNA" - proj: "{activity_id}" - Activity: + proj: "{workflow_execution_id}" + Workflow Execution: name: "Metatranscriptome Expression Analysis for {id}" type: nmdc:MetatranscriptomeExpressionAnalysis Outputs: diff --git a/nmdc_automation/config/workflows/workflows.yaml b/nmdc_automation/config/workflows/workflows.yaml index 11bd1468..50dc4d8b 100644 --- a/nmdc_automation/config/workflows/workflows.yaml +++ b/nmdc_automation/config/workflows/workflows.yaml @@ -1,6 +1,6 @@ Workflows: - Name: Sequencing Noninterleaved - Collection: omics_processing_set + Collection: data_generation_set Enabled: True Analyte Category: Metagenome Filter Output Objects: @@ -8,36 +8,35 @@ Workflows: - Metagenome Raw Read 2 - Name: Sequencing Interleaved - Collection: omics_processing_set + Collection: data_generation_set Enabled: True Analyte Category: Metagenome Filter Output Objects: - Metagenome Raw Reads - Name: Reads QC - Type: nmdc:ReadQcAnalysisActivity + Type: nmdc:ReadQcAnalysis Enabled: True Analyte Category: Metagenome Git_repo: https://github.com/microbiomedata/ReadsQC - Version: v1.0.8 + Version: v1.0.12 WDL: rqcfilter.wdl - Collection: read_qc_analysis_activity_set + Collection: workflow_execution_set Filter Input Objects: - Metagenome Raw Reads Predecessors: - - Sequencing - Sequencing Interleaved Input_prefix: nmdc_rqcfilter Inputs: input_files: do:Metagenome Raw Reads - proj: "{activity_id}" - Activity: - name: "Read QC Activity for {id}" + proj: "{workflow_execution_id}" + Workflow Execution: + name: "Read QC for {id}" input_read_bases: "{outputs.stats.input_read_bases}" input_read_count: "{outputs.stats.input_read_count}" output_read_bases: "{outputs.stats.output_read_bases}" output_read_count: "{outputs.stats.output_read_count}" - type: nmdc:ReadQcAnalysisActivity + type: nmdc:ReadQcAnalysis Outputs: - output: filtered_final name: Reads QC result fastq (clean data) @@ -53,30 +52,30 @@ Workflows: description: "Read filtering info for {id}" - Name: Reads QC Interleave - Type: nmdc:ReadQcAnalysisActivity + Type: nmdc:ReadQcAnalysis Enabled: True Analyte Category: Metagenome Git_repo: https://github.com/microbiomedata/ReadsQC - Version: v1.0.8 - Collection: read_qc_analysis_activity_set + Version: v1.0.12 + Collection: workflow_execution_set WDL: interleave_rqcfilter.wdl Input_prefix: nmdc_rqcfilter Inputs: - proj: "{activity_id}" - input_fastq1: do:Metagenome Raw Read 1 - input_fastq2: do:Metagenome Raw Read 2 + proj: "{workflow_execution_id}" + input_fq1: do:Metagenome Raw Read 1 + input_fq2: do:Metagenome Raw Read 2 Filter Input Objects: - Metagenome Raw Read 1 - Metagenome Raw Read 2 Predecessors: - Sequencing Noninterleaved - Activity: - name: "Read QC Activity for {id}" + Workflow Execution: + name: "Read QC for {id}" input_read_bases: "{outputs.stats.input_read_bases}" input_read_count: "{outputs.stats.input_read_count}" output_read_bases: "{outputs.stats.output_read_bases}" output_read_count: "{outputs.stats.output_read_count}" - type: nmdc:ReadQcAnalysisActivity + type: nmdc:ReadQcAnalysis Outputs: - output: filtered_final name: Reads QC result fastq (clean data) @@ -98,17 +97,17 @@ Workflows: Git_repo: https://github.com/microbiomedata/metaAssembly Version: v1.0.3 WDL: jgi_assembly.wdl - Collection: metagenome_assembly_set + Collection: workflow_execution_set Predecessors: - Reads QC - Reads QC Interleave Input_prefix: jgi_metaASM Inputs: input_file: do:Filtered Sequencing Reads - rename_contig_prefix: "{activity_id}" - proj: "{activity_id}" - Activity: - name: "Metagenome Assembly Activity for {id}" + rename_contig_prefix: "{workflow_execution_id}" + proj: "{workflow_execution_id}" + Workflow Execution: + name: "Metagenome Assembly for {id}" type: nmdc:MetagenomeAssembly asm_score: "{outputs.stats.asm_score}" contig_bp: "{outputs.stats.contig_bp}" @@ -162,23 +161,23 @@ Workflows: description: "Assembly info for {id}" - Name: Metagenome Annotation - Type: nmdc:MetagenomeAnnotationActivity + Type: nmdc:MetagenomeAnnotation Enabled: True Analyte Category: Metagenome Git_repo: https://github.com/microbiomedata/mg_annotation Version: v1.1.0 WDL: annotation_full.wdl - Collection: metagenome_annotation_activity_set + Collection: workflow_execution_set Predecessors: - Metagenome Assembly Input_prefix: annotation Inputs: input_file: do:Assembly Contigs imgap_project_id: "scaffold" - proj: "{activity_id}" - Activity: - name: "Metagenome Annotation Analysis Activity for {id}" - type: nmdc:MetagenomeAnnotationActivity + proj: "{workflow_execution_id}" + Workflow Execution: + name: "Metagenome Annotation Analysis for {id}" + type: nmdc:MetagenomeAnnotation Outputs: - output: proteins_faa data_object_type: Annotation Amino Acid FASTA @@ -285,18 +284,18 @@ Workflows: - Name: MAGs - Type: nmdc:MagsAnalysisActivity + Type: nmdc:MagsAnalysis Enabled: True Analyte Category: Metagenome Git_repo: https://github.com/microbiomedata/metaMAGs - Version: v1.3.10 + Version: v1.3.11 WDL: mbin_nmdc.wdl - Collection: mags_activity_set + Collection: workflow_execution_set Predecessors: - Metagenome Annotation Input_prefix: nmdc_mags Inputs: - proj: "{activity_id}" + proj: "{workflow_execution_id}" contig_file: do:Assembly Contigs sam_file: do:Assembly Coverage BAM gff_file: do:Functional Annotation GFF @@ -312,10 +311,10 @@ Workflows: lineage_file: do:Scaffold Lineage tsv map_file: do:Contig Mapping File Optional Inputs: - - map_file - Activity: - name: "Metagenome Assembled Genomes Analysis Activity for {id}" - type: nmdc:MagsAnalysisActivity + - map_file + Workflow Execution: + name: "Metagenome Assembled Genomes Analysis for {id}" + type: nmdc:MagsAnalysis binned_contig_num: "{outputs.final_stats_json.binned_contig_num}" input_contig_num: "{outputs.final_stats_json.input_contig_num}" low_depth_contig_num: "{outputs.final_stats_json.low_depth_contig_num}" @@ -360,27 +359,25 @@ Workflows: data_object_type: Metagenome Bins Krona Plot description: Metagenome Bins Krona Plot for {id} name: Metagenome Krona Bins Plot File - - - Name: Readbased Analysis - Type: nmdc:ReadBasedTaxonomyAnalysisActivity + Type: nmdc:ReadBasedTaxonomyAnalysis Enabled: True Analyte Category: Metagenome Git_repo: https://github.com/microbiomedata/ReadbasedAnalysis Version: v1.0.5 WDL: ReadbasedAnalysis.wdl - Collection: read_based_taxonomy_analysis_activity_set + Collection: workflow_execution_set Predecessors: - Reads QC - Reads QC Interleave Input_prefix: ReadbasedAnalysis Inputs: input_file: do:Filtered Sequencing Reads - proj: "{activity_id}" - Activity: - name: "Readbased Taxonomy Analysis Activity for {id}" - type: nmdc:ReadBasedTaxonomyAnalysisActivity + proj: "{workflow_execution_id}" + Workflow Execution: + name: "Readbased Taxonomy Analysis for {id}" + type: nmdc:ReadBasedTaxonomyAnalysis Outputs: - output: final_gottcha2_report_tsv data_object_type: GOTTCHA2 Classification Report diff --git a/nmdc_automation/import_automation/activity_mapper.py b/nmdc_automation/import_automation/activity_mapper.py index f7e72dd0..1226855e 100644 --- a/nmdc_automation/import_automation/activity_mapper.py +++ b/nmdc_automation/import_automation/activity_mapper.py @@ -6,8 +6,11 @@ import pytz import json import yaml -from typing import List, Dict, Callable, Tuple, Union -import nmdc_schema.nmdc as nmdc + +from typing import List, Dict, Union, Tuple +from nmdc_schema import nmdc + + from linkml_runtime.dumpers import json_dumper from nmdc_automation.api import NmdcRuntimeApi from .utils import object_action, file_link, get_md5, filter_import_by_type @@ -20,7 +23,7 @@ def __init__( self, iteration, file_list: List[Union[str, Path]], - omics_id: str, + nucelotide_sequencing_id: str, yaml_file: Union[str, Path], project_directory: Union[str, Path], site_config_file: Union[str, Path], @@ -30,7 +33,7 @@ def __init__( Args: file_list: List of file paths to be processed. - omics_id: Identifier for the omics data. + nucelotide_sequencing_id: Identifier for the omics data. yaml_file: File path of the yaml file containing import data. root_directory: Root directory path. project_directory: Project directory path. @@ -42,15 +45,15 @@ def __init__( self.nmdc_db = nmdc.Database() self.iteration = iteration self.file_list = file_list - self.omics_id = omics_id + self.nucelotide_sequencing_id = nucelotide_sequencing_id self.root_dir = os.path.join( - self.import_data["Workflow Metadata"]["Root Directory"], omics_id + self.import_data["Workflow Metadata"]["Root Directory"], nucelotide_sequencing_id ) self.project_dir = project_directory self.url = self.import_data["Workflow Metadata"]["Source URL"] self.data_object_type = "nmdc:DataObject" self.objects = {} - self.activity_ids = {} + self.workflow_execution_ids = {} self.workflows_by_type = {} self.runtime = NmdcRuntimeApi(site_config_file) @@ -61,7 +64,7 @@ def __init__( def unique_object_mapper(self) -> None: """ Map unique data objects from the file list based on unique matching import suffix. - The method relates each object to an activity ID and updates the file with object action. + The method relates each object to an workflow execution ID and updates the file with object action. It updates the nmdc database with the DataObject and stores the information in the objects dictionary. """ @@ -79,19 +82,19 @@ def unique_object_mapper(self) -> None: continue elif re.search(data_object_dict["import_suffix"], file): - activity_id = self.get_activity_id(data_object_dict["output_of"]) + workflow_execution_id = self.get_workflow_execution_id(data_object_dict["output_of"]) file_destination_name = object_action( file, data_object_dict["action"], - activity_id, + workflow_execution_id, data_object_dict["nmdc_suffix"], ) - activity_dir = os.path.join(self.root_dir, activity_id) + workflow_execution_dir = os.path.join(self.root_dir, workflow_execution_id) updated_file = file_link( - self.project_dir, file, activity_dir, file_destination_name + self.project_dir, file, workflow_execution_dir, file_destination_name ) filemeta = os.stat(updated_file) @@ -104,13 +107,13 @@ def unique_object_mapper(self) -> None: nmdc.DataObject( file_size_bytes=filemeta.st_size, name=file_destination_name, - url=f"{self.url}/{self.omics_id}/{activity_id}/{file_destination_name}", + url=f"{self.url}/{self.nucelotide_sequencing_id}/{workflow_execution_id}/{file_destination_name}", data_object_type=data_object_dict["data_object_type"], type=self.data_object_type, id=dobj, md5_checksum=md5, description=data_object_dict["description"].replace( - "{id}", self.omics_id + "{id}", self.nucelotide_sequencing_id ), ) ) @@ -123,7 +126,7 @@ def unique_object_mapper(self) -> None: def multiple_objects_mapper(self) -> None: """ Maps multiple data objects from the file list based on matching import suffix into one nmdc data object. - The method relates each object to an activity ID and updates the file with object action. + The method relates each object to an workflow execution ID and updates the file with object action. It updates the nmdc database with the DataObject and stores the information in the objects dictionary. """ @@ -135,23 +138,23 @@ def multiple_objects_mapper(self) -> None: if re.search(data_object_dict["import_suffix"], file): multiple_objects_list.append(file) - activity_id = self.get_activity_id(data_object_dict["output_of"]) + workflow_execution_id = self.get_workflow_execution_id(data_object_dict["output_of"]) - activity_dir = os.path.join(self.root_dir, activity_id) + workflow_execution_dir = os.path.join(self.root_dir, workflow_execution_id) file_destination_name = object_action( multiple_objects_list, data_object_dict["action"], - activity_id, + workflow_execution_id, data_object_dict["nmdc_suffix"], - activity_dir=activity_dir, + workflow_execution_dir=workflow_execution_dir, multiple=True, ) updated_file = file_link( self.project_dir, multiple_objects_list, - activity_dir, + workflow_execution_dir, file_destination_name, ) @@ -165,13 +168,13 @@ def multiple_objects_mapper(self) -> None: nmdc.DataObject( file_size_bytes=filemeta.st_size, name=data_object_dict["name"], - url=f"{self.url}/{self.omics_id}/{activity_dir}/{file_destination_name}", + url=f"{self.url}/{self.nucelotide_sequencing_id}/{workflow_execution_id}/{file_destination_name}", data_object_type=data_object_dict["data_object_type"], type=self.data_object_type, id=dobj, md5_checksum=md5, description=data_object_dict["description"].replace( - "{id}", self.omics_id + "{id}", self.nucelotide_sequencing_id ), ) ) @@ -182,11 +185,11 @@ def multiple_objects_mapper(self) -> None: dobj, ) - def activity_mapper(self) -> None: + def workflow_execution_mapper(self) -> None: """ - Maps activities from the import data to the NMDC database. - The function creates a database activity set for each workflow type in the import data, - attaching the relevant input and output objects. It also provides other metadata for each activity. + Maps workflow executions from the import data to the NMDC database. + The function creates a database workflow execution set for each workflow type in the import data, + attaching the relevant input and output objects. It also provides other metadata for each workflow execution. This method assumes that the import data includes a 'Workflows' section with each workflow having a 'Type', 'Git_repo', and 'Version'. It also assumes that the import data includes a 'Workflow Metadata' @@ -197,7 +200,7 @@ def activity_mapper(self) -> None: if not workflow.get("Import"): continue logging.info(f"Processing {workflow['Name']}") - has_inputs_list, has_output_list = self.attach_objects_to_activity( + has_inputs_list, has_output_list = self.attach_objects_to_workflow_execution( workflow["Type"] ) # quick fix because nmdc-schema does not support [], even though raw product has none @@ -208,18 +211,17 @@ def activity_mapper(self) -> None: if len(has_inputs_list) == 0: has_inputs_list = ["None"] # Lookup the nmdc database class - database_activity_set = getattr(self.nmdc_db, workflow["Collection"]) + database_workflow_execution_set = getattr(self.nmdc_db, workflow["Collection"]) # Lookup the nmdc schema range class - database_activity_range = getattr(nmdc, workflow["ActivityRange"]) + database_workflow_execution_range = getattr(nmdc, workflow["WorkflowExecutionRange"]) # Mint an ID - activity_id = self.get_activity_id(workflow["Type"]) - database_activity_set.append( - database_activity_range( - id=activity_id, - name=workflow["Activity"]["name"].replace("{id}", activity_id), + workflow_execution_id = self.get_workflow_execution_id(workflow["Type"]) + database_workflow_execution_set.append( + database_workflow_execution_range( + id=workflow_execution_id, + name=workflow["Workflow_Execution"]["name"].replace("{id}", workflow_execution_id), git_url=workflow["Git_repo"], version=workflow["Version"], - part_of=[self.omics_id], execution_resource=self.import_data["Workflow Metadata"][ "Execution Resource" ], @@ -228,43 +230,43 @@ def activity_mapper(self) -> None: has_output=has_output_list, type=workflow["Type"], ended_at_time=datetime.datetime.now(pytz.utc).isoformat(), - was_informed_by=self.omics_id, + was_informed_by=self.nucelotide_sequencing_id, ) ) - def get_activity_id(self, output_of: str) -> str: - """Lookup and returns minted activity id + def get_workflow_execution_id(self, output_of: str) -> str: + """Lookup and returns minted workflow execution id Args: - output_of (str): The activity type the data object is an output of. + output_of (str): The workflow execution type the data object is an output of. Returns: - str: The activity id for this workflow type. + str: The workflow execution id for this workflow type. """ - if output_of not in self.activity_ids: + if output_of not in self.workflow_execution_ids: wf = self.workflows_by_type[output_of] id = self.runtime.minter(wf["Type"]) + "." + self.iteration - self.activity_ids[output_of] = id + self.workflow_execution_ids[output_of] = id return id - return self.activity_ids[output_of] + return self.workflow_execution_ids[output_of] - def attach_objects_to_activity( - self, activity_type: str + def attach_objects_to_workflow_execution( + self, workflow_execution_type: str ) -> Tuple[List[str], List[str]]: """ - Get data objects that inform activity inputs and outputs. + Get data objects that inform workflow execution inputs and outputs. - This function iterates through the stored objects, checking if the provided activity_type + This function iterates through the stored objects, checking if the provided workflow_execution_type is in the 'input_to' or 'output_of' fields. If it is, the corresponding object is appended to the respective list (inputs or outputs). Args: - activity_type (str): The type of nmdc activity to relate object to. + workflow_execution_type (str): The type of nmdc workflow execution to relate object to. Returns: Tuple[List[str], List[str]]: Two lists containing the data object ids of the data objects that are inputs to and outputs of the specified - activity type. + workflow execution type. """ data_object_outputs_of_list = [] @@ -272,9 +274,9 @@ def attach_objects_to_activity( data_object_inputs_to_list = [] for _, data_object_items in self.objects.items(): - if activity_type in data_object_items[1]: + if workflow_execution_type in data_object_items[1]: data_object_outputs_of_list.append(data_object_items[2]) - elif activity_type in data_object_items[0]: + elif workflow_execution_type in data_object_items[0]: data_object_inputs_to_list.append(data_object_items[2]) return data_object_inputs_to_list, data_object_outputs_of_list diff --git a/nmdc_automation/import_automation/utils.py b/nmdc_automation/import_automation/utils.py index 696d9f3d..73007e3c 100644 --- a/nmdc_automation/import_automation/utils.py +++ b/nmdc_automation/import_automation/utils.py @@ -3,6 +3,7 @@ import logging import hashlib import os +from pathlib import Path logger = logging.getLogger(__name__) @@ -10,9 +11,9 @@ def object_action( file_s: Union[str, List[str]], action: str, - activity_id: str, + workflow_execution_id: str, nmdc_suffix: str, - activity_dir: str = None, + workflow_execution_dir: Union[str, Path] = None, multiple: bool = False, ) -> str: """ @@ -21,9 +22,9 @@ def object_action( Args: file_s (Union[str, List[str]]): The object or list of objects to perform the action on. action (str): The action to perform. Possible values are 'none', 'rename', or 'zip'. - activity_id (str): The activity ID associated with the object. + workflow_execution_id (str): The workflow execution subclass ID associated with the object. nmdc_suffix (str): The NMDC suffix. - activity_dir (str, optional): The directory where the activity is located. Defaults to None. + workflow_execution_dir (str or Path, optional): The directory where the workflow execution subclass is located. Defaults to None. multiple (bool, optional): Indicates if multiple files are involved. Defaults to False. Returns: @@ -34,12 +35,12 @@ def object_action( if action == "none": return get_basename(file_s) elif action == "rename": - return rename(activity_id, nmdc_suffix) + return rename(workflow_execution_id, nmdc_suffix) elif action == "zip": if multiple: zip_names = [] for file in file_s: - zip_name = zip_file(activity_id, nmdc_suffix, file, activity_dir) + zip_name = zip_file(workflow_execution_id, nmdc_suffix, file, workflow_execution_dir) zip_names.append(zip_name) return zip_names[0] else: @@ -62,30 +63,30 @@ def get_basename(file: str) -> str: return os.path.basename(file) -def rename(activity_id: str, nmdc_suffix: str) -> str: +def rename(workflow_execution_id: str, nmdc_suffix: str) -> str: """ - Renames file to target nmdc target activity name + Renames file to target nmdc target workflow execution name Args: - activity_id (str): activity id for corresponding data object + workflow_execution_id (str): workflow execution id for corresponding data object nmdc_suffix (str): expected target suffix Returns: str: nmdc file name """ - activity_file_id = activity_id.replace(":", "_") + workflow_execution_file_id = workflow_execution_id.replace(":", "_") - nmdc_file_name = activity_file_id + nmdc_suffix + nmdc_file_name = workflow_execution_file_id + nmdc_suffix return nmdc_file_name -def zip_file(activity_id: str, nmdc_suffix: str, file: str, project_dir: str): +def zip_file(workflow_execution_id: str, nmdc_suffix: str, file: str, project_dir: str): """Add files of type Multiples to a zip file and represent as one data object Args: - activity_id (str): The activity ID associated with the object. + workflow_execution_id (str): The activity ID associated with the object. nmdc_suffix (str): The NMDC suffix. file (str): The file associated with objects of type Multiples. project_dir (str, optional): The directory where the activity is located. @@ -95,7 +96,7 @@ def zip_file(activity_id: str, nmdc_suffix: str, file: str, project_dir: str): """ - zip_file_name = rename(activity_id, nmdc_suffix) + zip_file_name = rename(workflow_execution_id, nmdc_suffix) if not os.path.exists(os.path.join(project_dir, zip_file_name)): if not os.path.exists(project_dir): @@ -180,7 +181,7 @@ def filter_import_by_type(workflow_data: dict, nmdc_type: str) -> dict: Args: workflow_data (dict): Workflows - nmdc_type (str): nmdc:xxxxxActivity + nmdc_type (str): nmdc:xxxxxWorkflowExecution Returns: dict: Filtered workflows diff --git a/nmdc_automation/run_process/run_workflows.py b/nmdc_automation/run_process/run_workflows.py index 5038f45c..401fa2a9 100644 --- a/nmdc_automation/run_process/run_workflows.py +++ b/nmdc_automation/run_process/run_workflows.py @@ -35,7 +35,7 @@ def watcher(ctx, site_configuration_file): @click.argument("job_ids", nargs=-1) def submit(ctx, job_ids): watcher = ctx.obj - watcher.restore() + watcher.restore_from_checkpoint() for job_id in job_ids: job = watcher.nmdc.get_job(job_id) claims = job["claims"] @@ -57,7 +57,7 @@ def submit(ctx, job_ids): @click.argument("activity_ids", nargs=-1) def resubmit(ctx, activity_ids): watcher = ctx.obj - watcher.restore() + watcher.restore_from_checkpoint() for act_id in activity_ids: job = None if act_id.startswith("nmdc:sys"): @@ -65,7 +65,7 @@ def resubmit(ctx, activity_ids): else: key = "activity_id" for found_job in watcher.jobs: - job_record = found_job.get_state() + job_record = found_job.state() if job_record[key] == act_id: job = found_job break @@ -83,7 +83,7 @@ def resubmit(ctx, activity_ids): @click.pass_context def sync(ctx): watcher = ctx.obj - watcher.restore() + watcher.restore_from_checkpoint() watcher.update_op_state_all() diff --git a/nmdc_automation/workflow_automation/__init__.py b/nmdc_automation/workflow_automation/__init__.py index eb6d68c4..2fb3edc5 100644 --- a/nmdc_automation/workflow_automation/__init__.py +++ b/nmdc_automation/workflow_automation/__init__.py @@ -1,4 +1,2 @@ from .watch_nmdc import Watcher -from .workflows import load_workflows, Workflow -from .activities import load_activities, Activity -from .wfutils import WorkflowJob, NmdcSchema +from .workflows import load_workflow_configs diff --git a/nmdc_automation/workflow_automation/activities.py b/nmdc_automation/workflow_automation/activities.py deleted file mode 100644 index d0c57fca..00000000 --- a/nmdc_automation/workflow_automation/activities.py +++ /dev/null @@ -1,317 +0,0 @@ -import logging -from functools import lru_cache -from typing import List - -from semver.version import Version - -from .workflows import Workflow - -# TODO: Berkley refactoring: -# The load_activities method will need to be modified to handle DataGeneration objects -# instead of OmicsProcessing objects, with the difference being the DataGeneration objects can be part_of other -# DataGeneration objects. This will require a change in the way the parent/child relationships are resolved. -# Need to add logic to find the correct parent DataGeneration to use for constructing the Activity graph and -# correctly setting the was_informed_by field. -# Add unit tests to cover the new behavior, mocking the MongoDB database and the Berkley style DataGeneration objects. -# DataGeneration is an abstract class, include specific tests for subclasses NucleotideSequencing or MassSpectrometry - -warned_objects = set() - - -def get_required_data_objects_map(db, workflows: List[Workflow]) -> dict: - """ - Search for all the data objects that are required data object types for the workflows, - and return a dictionary of data objects by ID. - - TODO: In the future this will probably need to be redone - since the number of data objects could get very large. - """ - - # Build up a filter of what types are used - required_types = set() - for wf in workflows: - required_types.update(set(wf.do_types)) - - required_data_objs_by_id = dict() - for rec in db.data_object_set.find(): - do = DataObject(rec) - if do.data_object_type not in required_types: - continue - required_data_objs_by_id[do.id] = do - return required_data_objs_by_id - - -@lru_cache -def _within_range(ver1: str, ver2: str) -> bool: - """ - Determine if two workflows are within a major and minor - version of each other. - """ - - def get_version(version): - v_string = version.lstrip("b").lstrip("v").rstrip("-beta") - return Version.parse(v_string) - - v1 = get_version(ver1) - v2 = get_version(ver2) - if v1.major == v2.major and v1.minor == v2.minor: - return True - return False - - -def _check(match_types, data_object_ids, data_objs): - """ - This iterates through a list of data objects and - checks the type against the match types. - """ - if not data_object_ids: - return False - if not match_types or len(match_types) == 0: - return True - match_set = set(match_types) - do_types = set() - for doid in data_object_ids: - if doid in data_objs: - do_types.add(data_objs[doid].data_object_type) - return match_set.issubset(do_types) - - -def _is_missing_required_input_output(wf, rec, data_objs): - """ - Some workflows require specific inputs or outputs. This - implements the filtering for those. - """ - match_in = _check( - wf.filter_input_objects, rec.get("has_input"), data_objs - ) - match_out = _check( - wf.filter_output_objects, rec.get("has_output"), data_objs - ) - return not (match_in and match_out) - - -def get_workflow_executions(db, workflows: List[Workflow], data_objects: dict, allowlist: set): - """ - Fetch the relevant workflow executions from the database for the given workflows. - 1. Get the Data Generation (formerly Omics Processing) objects for the workflows by analyte category. - 2. Get the remaining Workflow Execution objects that was_informed_by the Data Generation objects. - 3. Filter Workflow Execution objects by: - - version (within range) - - required input and output data objects - Return the list of Workflow Execution objects. - """ - workflow_executions = [] - analyte_category = _determine_analyte_category(workflows) - - # We handle the data generation and data processing workflows separately. Data generation workflow executions have an - # analyte category field, while data processing workflow executions do not, so we filter by the was_informed_by field. - data_generation_ids = set() - dg_workflows = [wf for wf in workflows if wf.collection in ["omics_processing_set", "data_generation_set"]] - dp_workflows = [wf for wf in workflows if not wf.collection in ["omics_processing_set", "data_generation_set"]] - - # Berkley - # workflow_execution_records = db["data_generation_set].find({"analyte_category": analyte_category}) - # default query - q = {"omics_type.has_raw_value": {"$regex": analyte_category, "$options": "i"}} - # override query with allowlist - if allowlist: - q["id"] = {"$in": list(allowlist)} - dg_execution_records = db["omics_processing_set"].find(q) - # change from cursor to list - dg_execution_records = list(dg_execution_records) - - for wf in dg_workflows: - for rec in dg_execution_records: - if _is_missing_required_input_output(wf, rec, data_objects): - continue - data_generation_ids.add(rec["id"]) - act = Activity(rec, wf) - workflow_executions.append(act) - - for wf in dp_workflows: - q = {} - if wf.git_repo: - q = {"git_url": wf.git_repo} - # override query with allowlist - if allowlist: - q = {"was_informed_by": {"$in": list(allowlist)}} - - records = db[wf.collection].find(q) - for rec in records: - if wf.version and not _within_range(rec["version"], wf.version): - continue - if _is_missing_required_input_output(wf, rec, data_objects): - continue - if rec["was_informed_by"] in data_generation_ids: - act = Activity(rec, wf) - workflow_executions.append(act) - - return workflow_executions - - -def _determine_analyte_category(workflows: List[Workflow]) -> str: - analyte_categories = set([wf.analyte_category for wf in workflows]) - if len(analyte_categories) > 1: - raise ValueError("Multiple analyte categories not supported") - elif len(analyte_categories) == 0: - raise ValueError("No analyte category found") - analyte_category = analyte_categories.pop() - return analyte_category - - -# TODO: Make public, give a better name, add type hints and unit tests. -def _resolve_relationships(activities, data_obj_act): - """ - Find the parents and children relationships - between the activities - """ - # We now have a list of all the activites and - # a map of all of the data objects they generated. - # Let's use this to find the parent activity - # for each child activity - for act in activities: - logging.debug(f"Processing {act.id} {act.name} {act.workflow.name}") - act_pred_wfs = act.workflow.parents - if not act_pred_wfs: - logging.debug("- No Predecessors") - continue - # Go through its inputs - for do_id in act.has_input: - if do_id not in data_obj_act: - # This really shouldn't happen - if do_id not in warned_objects: - logging.warning(f"Missing data object {do_id}") - warned_objects.add(do_id) - continue - parent_act = data_obj_act[do_id] - # This is to cover the case where it was a duplicate. - # This shouldn't happen in the future. - if not parent_act: - logging.warning("Parent act is none") - continue - # Let's make sure these came from the same source - # This is just a safeguard - if act.was_informed_by != parent_act.was_informed_by: - logging.warning( - "Mismatched informed by for " - f"{do_id} in {act.id} " - f"{act.was_informed_by} != " - f"{parent_act.was_informed_by}" - ) - continue - # We only want to use it as a parent if it is the right - # parent workflow. Some inputs may come from ancestors - # further up - if parent_act.workflow in act_pred_wfs: - # This is the one - act.parent = parent_act - parent_act.children.append(act) - logging.debug( - f"Found parent: {parent_act.id}" - f" {parent_act.name}" - ) - break - if len(act.workflow.parents) > 0 and not act.parent: - if act.id not in warned_objects: - logging.warning(f"Didn't find a parent for {act.id}") - warned_objects.add(act.id) - # Now all the activities have their parent - return activities - - -def _find_data_object_activities(activities, data_objs_by_id): - """ - Find the activity that generated each data object to - use in the relationship method. - """ - data_obj_act = dict() - for act in activities: - for do_id in act.has_output: - if do_id in data_objs_by_id: - do = data_objs_by_id[do_id] - act.add_data_object(do) - # If its a dupe, set it to none - # so we can ignore it later. - # Once we re-id the data objects this - # shouldn't happen - if do_id in data_obj_act: - if do_id not in warned_objects: - logging.warning(f"Duplicate output object {do_id}") - warned_objects.add(do_id) - data_obj_act[do_id] = None - else: - data_obj_act[do_id] = act - return data_obj_act - - -# TODO: Give a better name, add unit tests. -# This function builds up the graph of related parent / child Execution objects and is -# key to the behavior of workflow automation. -def load_activities(db, workflows: list[Workflow], allowlist: set = set()): - """ - This reads the activities from Mongo. It also - finds the parent and child relationships between - the activities using the has_output and has_input - to connect things. - - Finally it creates a map of data objects by type - for each activity. - - Inputs: - db: mongo database - workflow: workflow - """ - - # This is map from the data object ID to the activity - # that created it. - data_objs_by_id = get_required_data_objects_map(db, workflows) - - # Build up a set of relevant activities and a map from - # the output objects to the activity that generated them. - workflow_executions = get_workflow_executions(db, workflows, data_objs_by_id, allowlist) - - data_obj_act = _find_data_object_activities(workflow_executions, data_objs_by_id) - - # Now populate the parent and children values for the - # activities - _resolve_relationships(workflow_executions, data_obj_act) - return workflow_executions - - -# TODO: Why are we not importing and using the existing nmdc_schema.DataObject class? -# nmdc_schema.DataObject is stricter and using it currently causes tests / fixtures to fail. -# We should fix the tests and fixtures to use the stricter class and remove this class. -class DataObject(object): - """ - Data Object Class - """ - - _FIELDS = ["id", "name", "description", "url", "md5_checksum", "file_size_bytes", "data_object_type", ] - - def __init__(self, rec: dict): - for f in self._FIELDS: - setattr(self, f, rec.get(f)) - - -# TODO: Give a better 'Execution' based name, expand docstring, and make sure it is covered by unit tests. -# This class represents a network of related WorkflowExecution objects and their associated DataObject objects. -class Activity(object): - """ - Activity Object Class - """ - - _FIELDS = ["id", "name", "git_url", "version", "has_input", "has_output", "was_informed_by", "type", ] - - def __init__(self, activity_rec: dict, wf: Workflow): - self.parent = None - self.children = [] - self.data_objects_by_type = dict() - self.workflow = wf - for f in self._FIELDS: - setattr(self, f, activity_rec.get(f)) - # TODO the analogous Berkeley Schema type will be nmdc:DataGeneration - if self.type == "nmdc:OmicsProcessing": - self.was_informed_by = self.id - - def add_data_object(self, do: DataObject): - self.data_objects_by_type[do.data_object_type] = do diff --git a/nmdc_automation/workflow_automation/models.py b/nmdc_automation/workflow_automation/models.py new file mode 100644 index 00000000..0db245ab --- /dev/null +++ b/nmdc_automation/workflow_automation/models.py @@ -0,0 +1,316 @@ +""" Model classes for the workflow automation app. """ +from dataclasses import dataclass, field +from dateutil import parser +from datetime import datetime +from typing import List, Dict, Any, Optional, Set, Union + +from nmdc_schema.nmdc import ( + DataGeneration, + FileTypeEnum, + NucleotideSequencing, + MagsAnalysis, + MetagenomeAssembly, + MetagenomeAnnotation, + MetatranscriptomeAssembly, + MetatranscriptomeAnnotation, + MetatranscriptomeExpressionAnalysis, + ReadBasedTaxonomyAnalysis, + ReadQcAnalysis, + WorkflowExecution +) +from nmdc_schema import nmdc + + +def workflow_process_factory(record: Dict[str, Any]) -> Union[DataGeneration, WorkflowExecution]: + """ + Factory function to create a PlannedProcess subclass object from a record. + Subclasses are determined by the "type" field in the record, and can be + either a WorkflowExecution or DataGeneration object. + """ + process_types = { + "nmdc:MagsAnalysis": MagsAnalysis, + "nmdc:MetagenomeAnnotation": MetagenomeAnnotation, + "nmdc:MetagenomeAssembly": MetagenomeAssembly, + "nmdc:MetatranscriptomeAnnotation": MetatranscriptomeAnnotation, + "nmdc:MetatranscriptomeAssembly": MetatranscriptomeAssembly, + "nmdc:MetatranscriptomeExpressionAnalysis": MetatranscriptomeExpressionAnalysis, + "nmdc:NucleotideSequencing": NucleotideSequencing, + "nmdc:ReadBasedTaxonomyAnalysis": ReadBasedTaxonomyAnalysis, + "nmdc:ReadQcAnalysis": ReadQcAnalysis, + } + record = _normalize_record(record) + + try: + cls = process_types[record["type"]] + except KeyError: + raise ValueError(f"Invalid workflow execution type: {record['type']}") + wfe = cls(**record) + return wfe + +def _normalize_record(record: Dict[str, Any]) -> Dict[str, Any]: + """ Normalize the record by removing the _id field and converting the type field to a string """ + record.pop("_id", None) + # for backwards compatibility strip Activity from the end of the type + record["type"] = record["type"].replace("Activity", "") + normalized_record = _strip_empty_values(record) + + # type-specific normalization + if normalized_record["type"] == "nmdc:MagsAnalysis": + normalized_record = _normalize_mags_record(normalized_record) + + return normalized_record + +def _normalize_mags_record(record: Dict[str, Any]) -> Dict[str, Any]: + """ Normalize the record for a MagsAnalysis object """ + for i, mag in enumerate(record.get("mags_list", [])): + if not mag.get("type"): + # Update the original dictionary in the list + record["mags_list"][i]["type"] = "nmdc:MagBin" + # for backwards compatibility normalize num_tRNA to num_t_rna + if "num_tRNA" in mag: + record["mags_list"][i]["num_t_rna"] = mag.pop("num_tRNA") + # add type to eukaryotic_evaluation if it exists + if "eukaryotic_evaluation" in mag: + record["mags_list"][i]["eukaryotic_evaluation"]["type"] = "nmdc:EukEval" + return record + + +def _strip_empty_values(d: Dict[str, Any]) -> Dict[str, Any]: + """ Strip empty values from a record """ + empty_values = [None, "", [], "null", 0] + def clean_dict(d): + if isinstance(d, dict): + return {k: clean_dict(v) for k, v in d.items() if v not in empty_values} + elif isinstance(d, list): + return [clean_dict(v) for v in d if v not in empty_values] + return d + return clean_dict(d) + + +class WorkflowProcessNode(object): + """ + Class to represent a workflow processing node. This is a node in a tree + structure that represents the tree of data generation and + workflow execution objects with their associated data objects. + """ + def __init__(self, record: Dict[str, Any], workflow: "WorkflowConfig"): + self.parent = None + self.children = [] + self.data_objects_by_type = {} + self.workflow = workflow + process = workflow_process_factory(record) + self.process = process + + def __hash__(self): + return hash((self.id, self.type)) + + def __eq__(self, other): + return self.id == other.id and self.type == other.type + + def add_data_object(self, data_object): + self.data_objects_by_type[data_object.data_object_type] = data_object + + @property + def id(self): + return self.process.id + + @property + def type(self): + return self.process.type + + @property + def name(self): + return self.process.name + + @property + def has_input(self): + return self.process.has_input + + @property + def has_output(self): + return self.process.has_output + + @property + def git_url(self): + """ workflow executions have a git_url field, data generations do not""" + return getattr(self.process, "git_url", None) + + @property + def version(self): + """ workflow executions have a version field, data generations do not""" + return getattr(self.process, "version", None) + + @property + def analyte_category(self): + """ data generations have an analyte_category field, workflow executions do not""" + return getattr(self.process, "analyte_category", None) + + @property + def was_informed_by(self): + """ workflow executions have a was_informed_by field, data generations get set to their own id""" + return getattr(self.process, "was_informed_by", self.id) + + +class DataObject(nmdc.DataObject): + """ + Extends the NMDC DataObject dataclass with additional methods for serialization. + """ + def __init__(self, **record): + """ Initialize the object from a dictionary """ + # _id is a MongoDB field that makes the parent class fail to initialize + record.pop("_id", None) + if "type" not in record: + record["type"] = "nmdc:DataObject" + super().__init__(**record) + + def as_dict(self): + """ Return the object as a dictionary, excluding None values, empty lists, and data_object_type as a string """ + return { + key: value + for key, value in self.__dict__.items() + if not key.startswith("_") and value + } | {"data_object_type": self.data_object_type} + + @property + def data_object_type(self): + """ Return the data object type as a string """ + if isinstance(self._data_object_type, FileTypeEnum): + return self._data_object_type.code.text + return str(self._data_object_type) + + @data_object_type.setter + def data_object_type(self, value): + """ Set the data object type from a string or FileTypeEnum """ + if isinstance(value, FileTypeEnum): + self._data_object_type = value + else: + self._data_object_type = FileTypeEnum(value) + + +@dataclass +class WorkflowConfig: + """ Configuration for a workflow execution. Defined by .yaml files in nmdc_automation/config/workflows """ + # Sequencing workflows only have these fields + name: str + collection: str + enabled: bool + analyte_category: str + filter_output_objects: List[str] + # TODO should type be optional? + type: Optional[str] = None + + # workflow repository information + git_repo: Optional[str] = None + version: Optional[str] = None + wdl: Optional[str] = None + # workflow execution and input / output information + filter_output_objects: List[str] = field(default_factory=list) + predecessors: List[str] = field(default_factory=list) + filter_input_objects: List[str] = field(default_factory=list) + input_prefix: str = None + inputs: Dict[str, str] = field(default_factory=dict) + optional_inputs: List[str] = field(default_factory=list) + workflow_execution: Dict[str, Any] = field(default_factory=dict) + outputs: List[Dict[str, str]] = field(default_factory=list) + + # populated after initialization + children: Set["WorkflowConfig"] = field(default_factory=set) + parents: Set["WorkflowConfig"] = field(default_factory=set) + data_object_types: List[str] = field(default_factory=list) + + def __post_init__(self): + """ Initialize the object """ + for _, inp_param in self.inputs.items(): + if inp_param.startswith("do:"): + self.data_object_types.append(inp_param[3:]) + if not self.type: + # Infer the type from the name + if self.collection == 'data_generation_set' and 'Sequencing' in self.name: + self.type = 'nmdc:NucleotideSequencing' + + def __hash__(self): + return hash(self.name) + + def __eq__(self, other): + return self.name == other.name + + + def add_child(self, child: "WorkflowConfig"): + """ Add a child workflow """ + self.children.add(child) + + def add_parent(self, parent: "WorkflowConfig"): + """ Add a parent workflow """ + self.parents.add(parent) + + +@dataclass +class JobWorkflow: + id: str + +@dataclass +class JobConfig: + """ Represents a job configuration from the NMDC API jobs endpoint / MongoDB jobs collection """ + git_repo: str + release: str + wdl: str + activity_id: str + activity_set: str + was_informed_by: str + trigger_activity: str + iteration: int + input_prefix: str + inputs: Dict[str, str] + input_data_objects: List[DataObject] + activity: Dict[str, str] + outputs: List[Dict[str, str]] + + +@dataclass +class JobClaim: + op_id: str + site_id: str + +@dataclass +class JobOutput: + """ Represents a job output specification. """ + output: str + data_object: DataObject = field(init=False) + + # Raw fields that will map to DataObject fields + data_object_type: str + description: Optional[str] + name: str + id: str + + def __post_init__(self): + """ Initialize the object """ + self.data_object = DataObject( + id=self.id, + name=self.name, + data_object_type=self.data_object_type, + description=self.description, + ) + +@dataclass +class Job: + """ Represents a job from the NMDC API jobs endpoint / MongoDB jobs collection """ + id: str + workflow: JobWorkflow + config: JobConfig + created_at: Optional[datetime] = field(default=None) + claims: List[JobClaim] = field(default_factory=list) + + def __post_init__(self): + """ If created_at is a string, convert it to a datetime object """ + if isinstance(self.created_at, str): + self.created_at = parser.isoparse(self.created_at) + + if isinstance(self.workflow, dict): + self.workflow = JobWorkflow(**self.workflow) + + if isinstance(self.config, dict): + self.config = JobConfig(**self.config) + + if isinstance(self.claims, list): + self.claims = [JobClaim(**claim) for claim in self.claims] diff --git a/nmdc_automation/workflow_automation/sched.py b/nmdc_automation/workflow_automation/sched.py index 98cd8a2e..08540658 100644 --- a/nmdc_automation/workflow_automation/sched.py +++ b/nmdc_automation/workflow_automation/sched.py @@ -5,11 +5,12 @@ import os from time import sleep as _sleep from nmdc_automation.api.nmdcapi import NmdcRuntimeApi -from nmdc_automation.workflow_automation.workflows import load_workflows, Workflow +from nmdc_automation.workflow_automation.workflows import load_workflow_configs from functools import lru_cache from pymongo import MongoClient from pymongo.database import Database as MongoDatabase -from nmdc_automation.workflow_automation.activities import load_activities, Activity +from nmdc_automation.workflow_automation.workflow_process import load_workflow_process_nodes +from nmdc_automation.workflow_automation.models import WorkflowProcessNode, WorkflowConfig from semver.version import Version @@ -41,7 +42,7 @@ def get_mongo_db() -> MongoDatabase: return _client[os.getenv("MONGO_DBNAME")] -def within_range(wf1: Workflow, wf2: Workflow, force=False) -> bool: +def within_range(wf1: WorkflowConfig, wf2: WorkflowConfig, force=False) -> bool: """ Determine if two workflows are within a major and minor version of each other. @@ -69,12 +70,12 @@ def get_version(wf): """ # TODO: Change the name of this to distinguish it from the database Job object -class Job: +class SchedulerJob: """ Class to hold information for new jobs """ - def __init__(self, workflow: Workflow, trigger_act: str): + def __init__(self, workflow: WorkflowConfig, trigger_act: WorkflowProcessNode): self.workflow = workflow self.trigger_act = trigger_act self.informed_by = trigger_act.was_informed_by @@ -82,21 +83,13 @@ def __init__(self, workflow: Workflow, trigger_act: str): class Scheduler: - # TODO: Get this from the config - _sets = [ - "metagenome_annotation_activity_set", - "metagenome_assembly_set", - "read_qc_analysis_activity_set", - "mags_activity_set", - "read_based_analysis_activity_set", - ] def __init__(self, db, wfn="workflows.yaml", site_conf="site_configuration.toml"): logging.info("Initializing Scheduler") # Init wf_file = os.environ.get(_WF_YAML_ENV, wfn) - self.workflows = load_workflows(wf_file) + self.workflows = load_workflow_configs(wf_file) self.db = db self.api = NmdcRuntimeApi(site_conf) # TODO: Make force a optional parameter @@ -111,8 +104,7 @@ async def run(self): self.cycle() await asyncio.sleep(_POLL_INTERVAL) - # TODO: - def add_job_rec(self, job: Job): + def create_job_rec(self, job: SchedulerJob): """ This takes a job and using the workflow definition, resolves all the information needed to create a @@ -122,11 +114,11 @@ def add_job_rec(self, job: Job): next_act = job.trigger_act do_by_type = dict() while next_act: - for do_type, val in next_act.data_objects_by_type.items(): + for do_type, data_object in next_act.data_objects_by_type.items(): if do_type in do_by_type: - logging.debug(f"Ignoring Duplicate type: {do_type} {val.id} {next_act.id}") + logging.debug(f"Ignoring Duplicate type: {do_type} {data_object.id} {next_act.id}") continue - do_by_type[do_type] = val.__dict__ + do_by_type[do_type] = data_object # do_by_type.update(next_act.data_objects_by_type.__dict__) next_act = next_act.parent @@ -144,7 +136,7 @@ def add_job_rec(self, job: Job): if k in optional_inputs: continue raise ValueError(f"Unable to find {do_type} in {do_by_type}") - inp_objects.append(dobj) + inp_objects.append(dobj.as_dict()) v = dobj["url"] # TODO: Make this smarter elif v == "{was_informed_by}": @@ -170,8 +162,8 @@ def add_job_rec(self, job: Job): "inputs": inp, "input_data_objects": inp_objects, } - if wf.activity: - job_config["activity"] = wf.activity + if wf.workflow_execution: + job_config["activity"] = wf.workflow_execution if wf.outputs: outputs = [] for output in wf.outputs: @@ -187,7 +179,7 @@ def add_job_rec(self, job: Job): "config": job_config, "claims": [], } - self.db.jobs.insert_one(jr) + logging.info(f'JOB RECORD: {jr["id"]}') # This would make the job record # print(json.dumps(ji, indent=2)) @@ -216,7 +208,7 @@ def mock_mint(self, id_type): # pragma: no cover } return f"nmdc:wf{mapping[id_type]}-11-xxxxxx" - def get_activity_id(self, wf: Workflow, informed_by: str): + def get_activity_id(self, wf: WorkflowConfig, informed_by: str): """ See if anything exist for this and if not mint a new id. @@ -244,7 +236,7 @@ def get_activity_id(self, wf: Workflow, informed_by: str): # from the jobs collection for a given workflow. Also activity should be execution to conform # to the new schema. @lru_cache(maxsize=128) - def get_existing_jobs(self, wf: Workflow): + def get_existing_jobs(self, wf: WorkflowConfig): existing_jobs = set() # Filter by git_repo and version # Find all existing jobs for this workflow @@ -257,7 +249,7 @@ def get_existing_jobs(self, wf: Workflow): return existing_jobs # TODO: Rename this to reflect what it does and add unit tests - def find_new_jobs(self, act: Activity) -> list[Job]: + def find_new_jobs(self, wfp_node: WorkflowProcessNode) -> list[SchedulerJob]: """ For a given activity see if there are any new jobs that should be created. @@ -265,24 +257,25 @@ def find_new_jobs(self, act: Activity) -> list[Job]: new_jobs = [] # Loop over the derived workflows for this # activities' workflow - for wf in act.workflow.children: + for wf in wfp_node.workflow.children: # Ignore disabled workflows if not wf.enabled: continue # See if we already have a job for this - if act.id in self.get_existing_jobs(wf): + existing_jobs = self.get_existing_jobs(wf) + if wfp_node.id in self.get_existing_jobs(wf): continue # Look at previously generated derived # activities to see if this is already done. - for child_act in act.children: + for child_act in wfp_node.children: if within_range(child_act.workflow, wf, force=self.force): break else: # These means no existing activities were # found that matched this workflow, so we # add a job - logging.debug(f"Creating a job {wf.name}:{wf.version} for {act.id}") - new_jobs.append(Job(wf, act)) + logging.debug(f"Creating a job {wf.name}:{wf.version} for {wfp_node.id}") + new_jobs.append(SchedulerJob(wf, wfp_node)) return new_jobs @@ -296,18 +289,18 @@ def cycle(self, dryrun: bool = False, skiplist: set = set(), filt = {"was_informed_by": {"$in": list(allowlist)}} # TODO: Quite a lot happens under the hood here. This function should be broken down into smaller # functions to improve readability and maintainability. - acts = load_activities(self.db, self.workflows, allowlist) + wfp_nodes = load_workflow_process_nodes(self.db, self.workflows, allowlist) self.get_existing_jobs.cache_clear() job_recs = [] - for act in acts: - if act.was_informed_by in skiplist: - logging.debug(f"Skipping: {act.was_informed_by}") + for wfp_node in wfp_nodes: + if wfp_node.was_informed_by in skiplist: + logging.debug(f"Skipping: {wfp_node.was_informed_by}") continue - if not act.workflow.enabled: - logging.debug(f"Skipping: {act.id}, workflow disabled.") + if not wfp_node.workflow.enabled: + logging.debug(f"Skipping: {wfp_node.id}, workflow disabled.") continue - jobs = self.find_new_jobs(act) + jobs = self.find_new_jobs(wfp_node) for job in jobs: if dryrun: msg = f"new job: informed_by: {job.informed_by} trigger: {job.trigger_id} " @@ -315,7 +308,8 @@ def cycle(self, dryrun: bool = False, skiplist: set = set(), logging.info(msg) continue try: - jr = self.add_job_rec(job) + jr = self.create_job_rec(job) + self.db.jobs.insert_one(jr) if jr: job_recs.append(jr) except Exception as ex: diff --git a/nmdc_automation/workflow_automation/watch_nmdc.py b/nmdc_automation/workflow_automation/watch_nmdc.py index 30ba0811..5894df91 100644 --- a/nmdc_automation/workflow_automation/watch_nmdc.py +++ b/nmdc_automation/workflow_automation/watch_nmdc.py @@ -6,277 +6,298 @@ import logging import shutil from json import loads -from os.path import exists +from pathlib import Path +from typing import List, Dict, Any, Optional, Union, Tuple + +from nmdc_schema.nmdc import Database from nmdc_automation.api import NmdcRuntimeApi -from nmdc_automation.config import Config -from .wfutils import WorkflowJob as wfjob -from .wfutils import NmdcSchema, _md5 +from nmdc_automation.config import SiteConfig +from .wfutils import WorkflowJob +from .wfutils import _md5 -logger = logging.getLogger(__name__) -# TODO: Berkley refactoring: -# The watcher interacts with the NMDC runtime API to find / claim jobs and to post the resulting -# data objects back to the NMDC database. It interacts with the operations endpoint to update the status -# after job completion. -# Ensure that these calls to the Berkeley API are compatible. - -# TODO: Rename to distinguish between WorkflowJob instances and not other types such as the Job class in sched.py and -# the jobs API endpoint and DB collection. -# TODO: Add type hints to all methods. -# TODO: Add docstrings to all public methods. -# TODO: This has a "Long Method Chain" code smell and Deep Nesting code smell. Refactor to reduce complexity. -class Watcher: - def __init__(self, site_configuration_file): - self._POLL = 20 - self._MAX_FAILS = 2 - self.should_skip_claim = False - self.config = Config(site_configuration_file) - self.client_id = self.config.client_id - self.client_secret = self.config.client_secret - # TODO: Is there some reason to rename this variable? Also it doesn't seem to be used. - self.cromurl = self.config.cromwell_url - self.state_file = self.config.agent_state - self.stage_dir = self.config.stage_dir - self.raw_dir = self.config.raw_dir - # TODO: make it clear that this is a list of WorkflowJob instances - self.jobs = [] - self.runtime_api = NmdcRuntimeApi(site_configuration_file) - self._ALLOWED = self.config.allowed_workflows - - # TODO: Why not name this method "restore_from_checkpoint"? - def restore(self, nocheck: bool = False): - """ - Restore from checkpoint - """ - # TODO: Give a better name to the variable - data is too generic - data = self._load_state_file() - if not data: - return +DEFAULT_STATE_DIR = Path(__file__).parent / "_state" +DEFAULT_STATE_FILE = DEFAULT_STATE_DIR / "state.json" +INITIAL_STATE = {"jobs": []} +logger = logging.getLogger(__name__) - self.jobs = self._find_jobs(data, nocheck) - def _load_state_file(self): - if not exists(self.state_file): - return +class FileHandler: + """ FileHandler class for managing state and metadata files """ + def __init__(self, config: SiteConfig, state_file: Union[str, Path] = None): + """ Initialize the FileHandler, with a Config object and an optional state file path """ + self.config = config + self._state_file = None + # set state file + if state_file: + self._state_file = Path(state_file) + elif self.config.agent_state: + self._state_file = Path(self.config.agent_state) + else: + # no state file provided or set in config set up a default + # check for a default state directory and create if it doesn't exist + DEFAULT_STATE_DIR.mkdir(parents=True, exist_ok=True) + DEFAULT_STATE_FILE.touch(exist_ok=True) + # if the file is empty write the initial state + if DEFAULT_STATE_FILE.stat().st_size == 0: + with open(DEFAULT_STATE_FILE, "w") as f: + json.dump(INITIAL_STATE, f, indent=2) + self._state_file = DEFAULT_STATE_FILE + + @property + def state_file(self) -> Path: + """ Get the state file path """ + return self._state_file + + @state_file.setter + def state_file(self, value) -> None: + """ Set the state file path """ + self._state_file = value + + def read_state(self)-> Optional[Dict[str, Any]]: + """ Read the state file and return the data """ with open(self.state_file, "r") as f: - return loads(f.read()) + state = loads(f.read()) + return state - # TODO: 'job' is too generic - def _find_jobs(self, data: dict, nocheck: bool): - new_job_list = [] - seen = {} - # TODO: Be explicit about the type of the data["jobs"] list + def write_state(self, data) -> None: + """ Write data to the state file """ + # normalize "id" used in database job records to "nmdc_jobid" for job in data["jobs"]: - job_id = job["nmdc_jobid"] - if job_id in seen: - continue - job_record = wfjob(self.config, state=job, nocheck=nocheck) - new_job_list.append(job_record) - seen[job_id] = True + if "id" in job: + job["nmdc_jobid"] = job.pop("id") + with open(self.state_file, "w") as f: + json.dump(data, f, indent=2) + + def get_output_path(self, job: WorkflowJob) -> Path: + """ Get the output path for a job """ + # construct path from string components + output_path = Path(self.config.data_dir) / job.was_informed_by / job.workflow_execution_id + return output_path + + def write_metadata_if_not_exists(self, job: WorkflowJob)->Path: + """ Write metadata to a file if it doesn't exist """ + metadata_filepath = self.get_output_path(job) / "metadata.json" + # make sure the parent directories exist + metadata_filepath.parent.mkdir(parents=True, exist_ok=True) + if not metadata_filepath.exists(): + with open(metadata_filepath, "w") as f: + json.dump(job.job.metadata, f) + return metadata_filepath + + +class JobManager: + """ JobManager class for managing WorkflowJob objects """ + def __init__(self, config: SiteConfig, file_handler: FileHandler, init_cache: bool = True): + """ Initialize the JobManager with a Config object and a FileHandler object """ + self.config = config + self.file_handler = file_handler + self._job_cache = [] + self._MAX_FAILS = 2 + if init_cache: + self.restore_from_state() + - return new_job_list + @property + def job_cache(self)-> List[WorkflowJob]: + """ Get the job cache """ + return self._job_cache - ################################# + @job_cache.setter + def job_cache(self, value) -> None: + """ Set the job cache """ + self._job_cache = value - def job_checkpoint(self): - jobs = [job.get_state() for job in self.jobs] + def job_checkpoint(self) -> Dict[str, Any]: + """ Get the state data for all jobs """ + jobs = [wfjob.workflow.state for wfjob in self.job_cache] data = {"jobs": jobs} - with open(self.state_file, "w") as f: - json.dump(data, f, indent=2) + return data + + def save_checkpoint(self) -> None: + """ Save jobs to state data """ + data = self.job_checkpoint() + self.file_handler.write_state(data) + + def restore_from_state(self)-> None: + """ Restore jobs from state data """ + self.job_cache = self.get_workflow_jobs_from_state() + + def get_workflow_jobs_from_state(self)-> List[WorkflowJob]: + """ Find jobs from state data """ + wf_job_list = [] + job_cache_ids = [job.opid for job in self.job_cache] + state = self.file_handler.read_state() + for job in state["jobs"]: + if job.get("opid") in job_cache_ids: + continue + wf_job = WorkflowJob(self.config, workflow_state=job) + job_cache_ids.append(wf_job.opid) + wf_job_list.append(wf_job) + return wf_job_list - def cycle(self): - self.restore() - if not self.should_skip_claim: - self.claim_jobs() - self.check_status() - def watch(self): - logger.info("Entering polling loop") - while True: - try: - self.cycle() - except (IOError, ValueError, TypeError, AttributeError) as e: - logger.exception(f"Error occurred during cycle: {e}", exc_info=True) - sleep(self._POLL) + def find_job_by_opid(self, opid) -> Optional[WorkflowJob]: + """ Find a job by operation id """ + return next((job for job in self.job_cache if job.opid == opid), None) - def find_job_by_opid(self, opid): - return next((job for job in self.jobs if job.opid == opid), None) - def submit(self, new_job, opid, force=False): - common_workflow_id = new_job["workflow"]["id"] - if "object_id_latest" in new_job["config"]: + def prepare_and_cache_new_job(self, new_job: WorkflowJob, opid: str, force=False)-> Optional[WorkflowJob]: + """ Prepare and cache a new job """ + if "object_id_latest" in new_job.workflow.config: logger.warning("Old record. Skipping.") return - self.create_or_use_existing_job(new_job, opid, common_workflow_id) - self.jobs[-1].cromwell_submit(force=force) - - def create_or_use_existing_job(self, new_job, opid, common_workflow_id): - job = self.find_job_by_opid(opid) - if job: - logger.debug("Previously cached job") - logger.info(f"Reusing activity {job.activity_id}") - self.jobs.append(job) - else: - logging.debug("NEW JOB") - logging.debug(new_job) - job = wfjob( - site_config=self.config, - typ=common_workflow_id, - nmdc_jobid=new_job["id"], - workflow_config=new_job["config"], - opid=opid, - activity_id=new_job["config"]["activity_id"], - ) - self.jobs.append(job) + existing_job = self.find_job_by_opid(opid) + if not existing_job: + new_job.set_opid(opid, force=force) + new_job.done = False + self.job_cache.append(new_job) + return new_job + elif force: + self.job_cache.remove(existing_job) + new_job.set_opid(opid, force=force) + new_job.done = False + self.job_cache.append(new_job) + return new_job + + + + def get_finished_jobs(self)->Tuple[List[WorkflowJob], List[WorkflowJob]]: + """ Get finished jobs """ + successful_jobs = [] + failed_jobs = [] + for job in self.job_cache: + if job.done: + status = job.job_status + if status == "Succeeded" and job.opid: + successful_jobs.append(job) + elif status == "Failed" and job.opid: + failed_jobs.append(job) + return (successful_jobs, failed_jobs) - def refresh_remote_jobs(self): - """ - Return a filtered list of nmdc jobs. - """ + + def process_successful_job(self, job: WorkflowJob) -> Database: + """ Process a successful job """ + logger.info(f"Running post for op {job.opid}") + + output_path = self.file_handler.get_output_path(job) + if not output_path.exists(): + output_path.mkdir(parents=True, exist_ok=True) + + database = Database() + + data_objects = job.make_data_objects(output_dir=output_path) + database.data_object_set = data_objects + workflow_execution_record = job.make_workflow_execution_record(data_objects) + database.workflow_execution_set = [workflow_execution_record] + + self.file_handler.write_metadata_if_not_exists(job) + return database + + + def process_failed_job(self, job: WorkflowJob) -> None: + """ Process a failed job """ + if job.workflow.state.get("failed_count", 0) >= self._MAX_FAILS: + logger.error(f"Job {job.opid} failed {self._MAX_FAILS} times. Skipping.") + return + job.workflow.state["failed_count"] = job.workflow.state.get("failed_count", 0) + 1 + job.workflow.state["last_status"] = job.job_status + self.save_checkpoint() + logger.error(f"Job {job.opid} failed {job.workflow.state['failed_count']} times. Retrying.") + job.job.submit_job() + + +class RuntimeApiHandler: + def __init__(self, config): + self.runtime_api = NmdcRuntimeApi(config) + self.config = config + + def claim_job(self, job_id): + return self.runtime_api.claim_job(job_id) + + def get_unclaimed_jobs(self, allowed_workflows)-> List[WorkflowJob]: + jobs = [] filt = { - "workflow.id": {"$in": self._ALLOWED}, + "workflow.id": {"$in": allowed_workflows}, "claims": {"$size": 0} } - logging.debug("Looking for jobs") - jobs = self.runtime_api.list_jobs(filt=filt) - logging.debug(f"Found {len(jobs)} jobs") - known = set(job.nmdc_jobid for job in self.jobs) - return [job for job in jobs if job["id"] not in known] - - # TODO: Pull the 'for job' logic up into the caller. - def claim_jobs(self): - for job in self.refresh_remote_jobs(): - job_id = job["id"] - if job.get("claims") and len(job.get("claims")) > 0: - continue - logger.debug(f"Trying to claim: {job_id}") - - # Claim job - claim = self.runtime_api.claim_job(job_id) - if not claim["claimed"]: - logger.debug(claim) - self.submit_and_checkpoint_job(job, claim["id"]) - else: - # Previously claimed - opid = claim["detail"]["id"] - logger.info("Previously claimed.") - self.submit_and_checkpoint_job(job, opid) - - def submit_and_checkpoint_job(self, job, opid): - self.submit(job, opid) - self.job_checkpoint() - - def _get_url(self, informed_by, act_id, fname): - root = self.config.url_root - return f"{root}/{informed_by}/{act_id}/{fname}" - - def _get_output_dir(self, informed_by, act_id): - data_directory = self.config.data_dir - outdir = os.path.join(data_directory, informed_by, act_id) - if not os.path.exists(outdir): - os.makedirs(outdir) - return outdir - - def post_job_done(self, job): - logger.info(f"Running post for op {job.opid}") - metadata = job.get_metadata() - informed_by = job.workflow_config["was_informed_by"] - act_id = job.activity_id - outdir = self._get_output_dir(informed_by, act_id) - schema = NmdcSchema() + job_records = self.runtime_api.list_jobs(filt=filt) + + for job in job_records: + jobs.append(WorkflowJob(self.config, workflow_state=job)) - output_ids = self.generate_data_objects( - job, metadata["outputs"], outdir, informed_by, act_id, schema - ) - activity_inputs = [dobj["id"] for dobj in job.input_data_objects] + return jobs - self.create_activity_record(job, act_id, activity_inputs, output_ids, schema) + def post_objects(self, database_obj): + return self.runtime_api.post_objects(database_obj) - self.write_metadata_if_not_exists(metadata, outdir) + def update_op(self, opid, done, meta): + return self.runtime_api.update_op(opid, done=done, meta=meta) - nmdc_database_obj = schema.get_database_object_dump() - nmdc_database_obj_dict = json.loads(nmdc_database_obj) - resp = self.runtime_api.post_objects(nmdc_database_obj_dict) - logger.info(f"Response: {resp}") - job.done = True - resp = self.runtime_api.update_op(job.opid, done=True, meta=metadata) +class Watcher: + def __init__(self, site_configuration_file: Union[str, Path], state_file: Union[str, Path] = None): + self._POLL = 20 + self._MAX_FAILS = 2 + self.should_skip_claim = False + self.config = SiteConfig(site_configuration_file) + self.file_handler = FileHandler(self.config, state_file) + self.runtime_api_handler = RuntimeApiHandler(self.config) + self.job_manager = JobManager(self.config, self.file_handler) - return resp + def restore_from_checkpoint(self, state_data: Dict[str, Any], nocheck: bool = False)-> None: + """ + Restore from checkpoint + """ + if state_data: + self.file_handler.write_state(state_data) + self.job_manager.restore_from_state() - def generate_data_objects(self, job, job_outs, outdir, informed_by, act_id, schema): - output_ids = [] - prefix = job.workflow_config["input_prefix"] - for product_record in job.outputs: - outkey = f"{prefix}.{product_record['output']}" - if outkey not in job_outs and product_record.get("optional"): - logging.debug(f"Ignoring optional missing output {outkey}") + def cycle(self): + self.restore_from_checkpoint() + if not self.should_skip_claim: + unclaimed_jobs = self.runtime_api_handler.get_unclaimed_jobs(self.config.allowed_workflows) + self.claim_jobs(unclaimed_jobs) + + successful_jobs, failed_jobs = self.job_manager.get_finished_jobs() + for job in successful_jobs: + job_database = self.job_manager.process_successful_job(job) + job_dict = json.loads(job_database.json(exclude_unset=True)) + + # post workflow execution and data objects to the runtime api + resp = self.runtime_api_handler.post_objects(job_dict) + if not resp.ok: + logger.error(f"Error posting objects: {resp}") continue - - full_name = job_outs[outkey] - file_name = os.path.basename(full_name) - new_path = os.path.join(outdir, file_name) - shutil.copyfile(full_name, new_path) - - md5 = _md5(full_name) - file_url = self._get_url(informed_by, act_id, file_name) - id = product_record["id"] - schema.make_data_object( - name=file_name, - full_file_name=full_name, - file_url=file_url, - data_object_type=product_record["data_object_type"], - dobj_id=product_record["id"], - md5_sum=md5, - description=product_record["description"], - omics_id=act_id, + job.done = True + # update the operation record + resp = self.runtime_api_handler.update_op( + job.opid, done=True, meta=job.job.metadata ) + if not resp.ok: + logger.error(f"Error updating operation: {resp}") + continue - output_ids.append(id) - - return output_ids - - def create_activity_record(self, job, act_id, activity_inputs, output_ids, schema): - activity_type = job.activity_templ["type"] - name = job.activity_templ["name"].replace("{id}", act_id) - omic_id = job.workflow_config["was_informed_by"] - resource = self.config.resource - schema.create_activity_record( - activity_record=activity_type, - activity_name=name, - workflow=job.workflow_config, - activity_id=act_id, - resource=resource, - has_inputs_list=activity_inputs, - has_output_list=output_ids, - omic_id=omic_id, - start_time=job.start, - end_time=job.end, - ) - - def write_metadata_if_not_exists(self, metadata, outdir): - metadata_filepath = os.path.join(outdir, "metadata.json") - if not os.path.exists(metadata_filepath): - with open(metadata_filepath, "w") as f: - json.dump(metadata, f) - - def check_status(self): - for job in self.jobs: - if not job.done: - status = job.check_status() - if status == "Succeeded" and job.opid: - self.process_successful_job(job) - elif status == "Failed" and job.opid: - self.process_failed_job(job) + for job in failed_jobs: + self.job_manager.process_failed_job(job) - self.job_checkpoint() + def watch(self): + logger.info("Entering polling loop") + while True: + try: + self.cycle() + except (IOError, ValueError, TypeError, AttributeError) as e: + logger.exception(f"Error occurred during cycle: {e}", exc_info=True) + sleep(self._POLL) - # TODO: DRY up both of these methods into a single method that takes a status argument. - def process_successful_job(self, job): - self.post_job_done(job) - def process_failed_job(self, job): - if job.failed_count < self._MAX_FAILS: - job.failed_count += 1 - job.cromwell_submit() + def claim_jobs(self, unclaimed_jobs: List[WorkflowJob] = None): + # unclaimed_jobs = self.runtime_api_handler.get_unclaimed_jobs(self.config.allowed_workflows) + for job in unclaimed_jobs: + claim = self.runtime_api_handler.claim_job(job.workflow.nmdc_jobid) + opid = claim["detail"]["id"] + new_job = self.job_manager.prepare_and_cache_new_job(job, opid) + if new_job: + new_job.job.submit_job() + self.file_handler.write_state(self.job_manager.job_checkpoint()) diff --git a/nmdc_automation/workflow_automation/wfutils.py b/nmdc_automation/workflow_automation/wfutils.py index 8a91d8e4..86c67912 100755 --- a/nmdc_automation/workflow_automation/wfutils.py +++ b/nmdc_automation/workflow_automation/wfutils.py @@ -1,399 +1,546 @@ #!/usr/bin/env python -import os +import hashlib import json -import tempfile -import requests -import nmdc_schema.nmdc as nmdc import logging -import datetime +import os +import re +import shutil +import tempfile +from abc import ABC, abstractmethod +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List, Optional, Union + import pytz -import hashlib -from linkml_runtime.dumpers import json_dumper - -# TODO: Berkley refactoring: -# The NmdcSchema class - responsible for creating workflow and data object records -# to be inserted into the NMDC database will need to be updated to generate Berkley-compatible -# Datageneration and WorkflowExecution records. - -# TODO: Rename this class to something more descriptive - it runs and monitors workflows running in Cromwell -# via the Cromwell REST API. -# Consider renaming to CromwellWorkflowRunner. -# Consider generalizing the class to be able to submit and monitor workflows to other workflow engines e.g. JAWS. -# TODO: Add type hints to all methods, add docstrings to all methods. -# TODO: Rename the package to something more descriptive - it is responsible for running and monitoring workflows. -class WorkflowJob: - DEFAULT_STATUS = "Unsubmitted" - SUCCESS_STATUS = "Succeeded" - METADATA_URL_SUFFIX = "/metadata" - LABEL_SUBMITTER_VALUE = "nmdcda" - LABEL_PARAMETERS = ["release", "wdl", "git_repo"] - CHUNK_SIZE = 1000000 # 1 MB - GIT_RELEASES_PATH = "/releases/download" +import requests - debug = False - dryrun = False - options = None - activity_templ = None - outputs = None - input_data_objects = [] - start = None - end = None - - def __init__( - self, - site_config, - typ=None, - workflow_config=None, - nmdc_jobid=None, - opid=None, - activity_id=None, - state=None, - nocheck=False, - ): - self.config = site_config - self.workflow_config = workflow_config - self.set_config_attributes() - if workflow_config: - self.load_workflow_config() - self.set_initial_state(state, activity_id, typ, nmdc_jobid, opid) - if self.jobid and not nocheck: - self.check_status() - - def set_config_attributes(self): - # TODO: Why are we not using the config object directly? This is a code smell. - # Consider wrapping with @property decorators to make this more explicit. - self.cromurl = self.config.cromwell_url - self.data_dir = self.config.data_dir - self.resource = self.config.resource - self.url_root = self.config.url_root - - # TODO: These could be @property decorators - def load_workflow_config(self): - self.outputs = self.workflow_config.get("outputs") - self.activity_templ = self.workflow_config.get("activity") - self.input_data_objects = self.workflow_config.get("input_data_objects") - - def set_initial_state(self, state, activity_id, typ, nmdc_jobid, opid): - if state: - self.load_state_from_dict(state) - else: - self.set_default_state(activity_id, typ, nmdc_jobid, opid) - - def load_state_from_dict(self, state): - self.activity_id = state["activity_id"] - self.nmdc_jobid = state["nmdc_jobid"] - self.opid = state.get("opid", None) - self.type = state["type"] - self.workflow_config = state["conf"] - self.jobid = state["cromwell_jobid"] - self.last_status = state["last_status"] - self.failed_count = state.get("failed_count", 0) - self.done = state.get("done", None) - self.start = state.get("start") - self.end = state.get("end") - self.load_workflow_config() - - def set_default_state(self, activity_id, typ, nmdc_jobid, opid): - self.activity_id = activity_id - # TODO why? - self.type = typ - self.nmdc_jobid = nmdc_jobid - self.opid = opid - self.done = None - self.jobid = None - self.failed_count = 0 - self.last_status = self.DEFAULT_STATUS - - def get_state(self): - data = { - "type": self.type, - "cromwell_jobid": self.jobid, - "nmdc_jobid": self.nmdc_jobid, - "conf": self.workflow_config, - "activity_id": self.activity_id, - "last_status": self.last_status, - "done": self.done, - "failed_count": self.failed_count, - "start": self.start, - "end": self.end, - "opid": self.opid, - } - return data - - def check_status(self): - """ - Check the status in Cromwell - """ - if not self.jobid: - self.last_status = "Unsubmitted" - return self.last_status +from nmdc_automation.config import SiteConfig +from nmdc_automation.workflow_automation.models import DataObject - url = f"{self.cromurl}/{self.jobid}/status" +DEFAULT_MAX_RETRIES = 2 - try: - resp = requests.get(url) - resp.raise_for_status() - except requests.exceptions.RequestException as ex: - # logging.error(f"Error checking status: {ex}") - self.last_status = "Error" - return self.last_status - data = resp.json() - # TODO: Why not name this variable 'status'? - state = data.get("status", "Unknown") - self.last_status = state +class JobRunnerABC(ABC): + """Abstract base class for job runners""" + + @abstractmethod + def submit_job(self) -> str: + """ Submit a job """ + pass + + @abstractmethod + def get_job_status(self) -> str: + """ Get the status of a job """ + pass + + @abstractmethod + def get_job_metadata(self) -> Dict[str, Any]: + """ Get metadata for a job """ + pass + + @property + @abstractmethod + def job_id(self) -> Optional[str]: + """ Get the job id """ + pass + + @property + @abstractmethod + def outputs(self) -> Dict[str, str]: + """ Get the outputs """ + pass - if state == "Succeeded" and not self.end: - self.end = datetime.datetime.now(pytz.utc).isoformat() + @property + @abstractmethod + def metadata(self) -> Dict[str, Any]: + """ Get the metadata """ + pass - return state + @property + @abstractmethod + def max_retries(self) -> int: + """ Get the maximum number of retries """ + pass - def get_metadata(self): + +class CromwellRunner(JobRunnerABC): + """Job runner for Cromwell""" + LABEL_SUBMITTER_VALUE = "nmdcda" + LABEL_PARAMETERS = ["release", "wdl", "git_repo"] + NO_SUBMIT_STATES = ["Submitted", # job is already submitted but not running + "Running", # job is already running + "Failed", # job failed + "Succeeded", # job succeeded + "Aborted", # job was aborted and did not finish + "Aborting" # job is in the process of being aborted + "On Hold", # job is on hold and not running. It can be manually resumed later + ] + + def __init__(self, site_config: SiteConfig, workflow: "WorkflowStateManager", job_metadata: Dict[str, Any] = None, + max_retries: int = DEFAULT_MAX_RETRIES, dry_run: bool = False) -> None: """ - Check the status in Cromwell + Create a Cromwell job runner. + :param site_config: SiteConfig object + :param workflow: WorkflowStateManager object + :param job_metadata: metadata for the job + :param max_retries: maximum number of retries for a job + :param dry_run: if True, do not submit the job """ - if not self.jobid: - return self.DEFAULT_STATUS - url = f"{self.cromurl}/{self.jobid}{self.METADATA_URL_SUFFIX}" - resp = requests.get(url) - resp.raise_for_status() - return resp.json() - - def json_log(self, data, title="json_log"): - logging.debug(title) - logging.debug(json.dumps(data, indent=2)) - - def _generate_inputs(self): + self.config = site_config + if not isinstance(workflow, WorkflowStateManager): + raise ValueError("workflow must be a WorkflowStateManager object") + self.workflow = workflow + self.service_url = self.config.cromwell_url + self._metadata = {} + if job_metadata: + self._metadata = job_metadata + self._max_retries = max_retries + self.dry_run = dry_run + + def _generate_workflow_inputs(self) -> Dict[str, str]: + """ Generate inputs for the job runner from the workflow state """ inputs = {} - prefix = self.workflow_config["input_prefix"] - for input, input_object in self.workflow_config["inputs"].items(): - input_prefix = f"{prefix}.{input}" - if input_object == "{resource}": - input_object = self.config.resource - inputs[input_prefix] = input_object + prefix = self.workflow.input_prefix + for input_key, input_val in self.workflow.inputs.items(): + # special case for resource + if input_val == "{resource}": + input_val = self.config.resource + inputs[f"{prefix}.{input_key}"] = input_val return inputs - def _generate_labels(self): - labels = self.get_label_parameters() - labels["pipeline_version"] = labels["release"] - labels["pipeline"] = labels["wdl"] - labels["activity_id"] = self.activity_id - labels["opid"] = self.opid + def _generate_workflow_labels(self) -> Dict[str, str]: + """ Generate labels for the job runner from the workflow state """ + labels = {param: self.workflow.config[param] for param in self.LABEL_PARAMETERS} labels["submitter"] = self.LABEL_SUBMITTER_VALUE + # some Cromwell-specific labels + labels["pipeline_version"] = self.workflow.config["release"] + labels["pipeline"] = self.workflow.config["wdl"] + labels["activity_id"] = self.workflow.workflow_execution_id + labels["opid"] = self.workflow.opid return labels - def get_label_parameters(self): - return {param: self.workflow_config[param] for param in self.LABEL_PARAMETERS} - - def fetch_release_file(self, fn, suffix=None): - release = self.workflow_config["release"] - base_url = self.workflow_config["git_repo"].rstrip("/") - url = base_url + f"{self.GIT_RELEASES_PATH}/{release}/{fn}" - - logging.debug(f"BASE URL: {base_url}") - logging.debug(f"URL: {url}") - - resp = requests.get(url, stream=True) - resp.raise_for_status() - - fp, fname = tempfile.mkstemp(suffix=suffix) + def generate_submission_files(self) -> Dict[str, Any]: + """ Generate the files needed for a Cromwell job submission """ + files = {} try: - with os.fdopen(fp, "wb") as fd: - for chunk in resp.iter_content(chunk_size=self.CHUNK_SIZE): - fd.write(chunk) - except Exception as ex: - os.unlink(fname) - raise ex - - return fname - - def generate_files(self, conf): - wdl_file = self.fetch_release_file(conf["wdl"], suffix=".wdl") - bundle_file = self.fetch_release_file("bundle.zip", suffix=".zip") - files = { - "workflowSource": open(wdl_file), - "workflowDependencies": open(bundle_file, "rb"), - "workflowInputs": open(_json_tmp(self._generate_inputs())), - "labels": open(_json_tmp(self._generate_labels())), - } - if self.options: - files["workflowOptions"] = open(self.options) + wdl_file = self.workflow.fetch_release_file(self.workflow.config["wdl"], suffix=".wdl") + bundle_file = self.workflow.fetch_release_file("bundle.zip", suffix=".zip") + files = {"workflowSource": open(wdl_file, "rb"), "workflowDependencies": open(bundle_file, "rb"), + "workflowInputs": open(_json_tmp(self._generate_workflow_inputs()), "rb"), + "labels": open(_json_tmp(self._generate_workflow_labels()), "rb"), } + except Exception as e: + logging.error(f"Failed to generate submission files: {e}") + self._cleanup_files(list(files.values())) + raise e return files - def cromwell_submit(self, force=False): - # Refresh the log - status = self.check_status() - states = ["Failed", "Aborted", "Aborting", "Unsubmitted"] - if not force and status not in states: - logging.info("Skipping: %s %s" % (self.activity_id, status)) - return + def _cleanup_files(self, files: List[Union[tempfile.NamedTemporaryFile, tempfile.SpooledTemporaryFile]]): + """Safely closes and removes files.""" + for file in files: + try: + file.close() + os.unlink(file.name) + except Exception as e: + logging.error(f"Failed to cleanup file: {e}") - cleanup = [] - conf = self.workflow_config + def submit_job(self, force: bool = False) -> Optional[str]: + """ + Submit a job to Cromwell. Update the workflow state with the job id and status. + :param force: if True, submit the job even if it is in a state that does not require submission + :return: the job id + """ + status = self.get_job_status() + if status in self.NO_SUBMIT_STATES and not force: + logging.info(f"Job {self.job_id} in state {status}, skipping submission") + return + cleanup_files = [] try: - self.json_log(self._generate_inputs(), title="Inputs") - self.json_log(self._generate_labels(), title="Labels") - files = self.generate_files(conf) - cleanup.extend(files.values()) - - job_id = "unknown" - if not self.dryrun: - logging.debug(self.cromurl) - resp = requests.post(self.cromurl, data={}, files=files) - resp.raise_for_status() - data = resp.json() - self.json_log(data, title="Response") - job_id = data["id"] + files = self.generate_submission_files() + cleanup_files = list(files.values()) + if not self.dry_run: + response = requests.post(self.service_url, files=files) + response.raise_for_status() + self.metadata = response.json() + self.job_id = self.metadata["id"] + logging.info(f"Submitted job {self.job_id}") else: - job_id = "dryrun" - - logging.info(f"Submitted: {job_id}") - self.start = datetime.datetime.now(pytz.utc).isoformat() - self.jobid = job_id - self.done = False + logging.info(f"Dry run: skipping job submission") + self.job_id = "dry_run" + + logging.info(f"Job {self.job_id} submitted") + start_time = datetime.now(pytz.utc).isoformat() + # update workflow state + self.workflow.done = False + self.workflow.update_state({"start": start_time}) + self.workflow.update_state({"cromwell_jobid": self.job_id}) + self.workflow.update_state({"last_status": "Submitted"}) + return self.job_id + except Exception as e: + logging.error(f"Failed to submit job: {e}") + raise e finally: - for file in cleanup: - file.close() - os.unlink(file.name) + self._cleanup_files(cleanup_files) + + def get_job_status(self) -> str: + """ Get the status of a job from Cromwell """ + if not self.job_id: + return "Unknown" + status_url = f"{self.service_url}/{self.job_id}/status" + response = requests.get(status_url) + response.raise_for_status() + return response.json().get("status", "Unknown") + + def get_job_metadata(self) -> Dict[str, Any]: + """ Get metadata for a job from Cromwell """ + metadata_url = f"{self.service_url}/{self.job_id}/metadata" + response = requests.get(metadata_url) + response.raise_for_status() + metadata = response.json() + # update cached metadata + self.metadata = metadata + return metadata + + @property + def job_id(self) -> Optional[str]: + """ Get the job id from the metadata """ + return self.metadata.get("id", None) + + @job_id.setter + def job_id(self, job_id: str): + """ Set the job id in the metadata """ + self.metadata["id"] = job_id + + @property + def outputs(self) -> Dict[str, str]: + """ Get the outputs from the metadata """ + return self.metadata.get("outputs", {}) + + @property + def metadata(self) -> Dict[str, Any]: + """ Get the metadata """ + return self._metadata + + @metadata.setter + def metadata(self, metadata: Dict[str, Any]): + """ Set the metadata """ + self._metadata = metadata + + @property + def max_retries(self) -> int: + return self._max_retries + + +class WorkflowStateManager: + CHUNK_SIZE = 1000000 # 1 MB + GIT_RELEASES_PATH = "/releases/download" -# TODO: Rename this class to something descriptive - it is responsible for creating NMDC database objects - -# the existing name is already taken by the NMDC schema module. -# Consider renaming to NMDCDatabaseObjectCreator. -# Add type hints to all methods, add docstrings to all methods. -class NmdcSchema: - def __init__(self): - self.nmdc_db = nmdc.Database() - self._data_object_string = "nmdc:DataObject" - self.activity_store = self.activity_map() - - def make_data_object( - self, - name: str, - full_file_name: str, - file_url: str, - data_object_type: str, - dobj_id: str, - md5_sum: str, - description: str, - omics_id: str, - ) -> None: - """Create nmdc database data object - - Args: - name (str): name of data object - full_file_name (str): full file name - file_url (str): url for data object file - data_object_type (str): nmdc data object type - dobj_id (str): minted data object id - md5_sum (str): md5 check sum of data product - description (str): description for data object - omics_id (str): minted omics id + def __init__(self, state: Dict[str, Any] = None, opid: str = None): + if state is None: + state = {} + self.cached_state = state + if opid and "opid" in self.cached_state: + raise ValueError("opid already set in job state") + if opid: + self.cached_state["opid"] = opid + + def update_state(self, state: Dict[str, Any]): + self.cached_state.update(state) + + @property + def state(self) -> Dict[str, Any]: + return self.cached_state + + @property + def config(self) -> Dict[str, Any]: + # for backward compatibility we need to check for both keys + return self.cached_state.get("conf", self.cached_state.get("config", {})) + + @property + def execution_template(self) -> Dict[str, str]: + # for backward compatibility we need to check for both keys + return self.config.get("workflow_execution", self.config.get("activity", {})) + + @property + def workflow_execution_id(self) -> Optional[str]: + # for backward compatibility we need to check for both keys + return self.config.get("activity_id", self.config.get("workflow_execution_id", None)) + + @property + def was_informed_by(self) -> Optional[str]: + return self.config.get("was_informed_by", None) + + @property + def workflow_execution_type(self) -> Optional[str]: + return self.execution_template.get("type", None) + + @property + def workflow_execution_name(self) -> Optional[str]: + name_base = self.execution_template.get("name", None) + if name_base: + return name_base.replace("{id}", self.workflow_execution_id) + return None + + @property + def data_outputs(self) -> List[Dict[str, str]]: + return self.config.get("outputs", []) + + @property + def input_prefix(self) -> Optional[str]: + return self.config.get("input_prefix", None) + + @property + def inputs(self) -> Dict[str, str]: + return self.config.get("inputs", {}) + + @property + def nmdc_jobid(self) -> Optional[str]: + # different keys in state file vs database record + return self.cached_state.get("nmdc_jobid", self.cached_state.get("id", None)) + + @property + def job_runner_id(self) -> Optional[str]: + # for now we only have cromwell as a job runner + job_runner_ids = ["cromwell_jobid", ] + for job_runner_id in job_runner_ids: + if job_runner_id in self.cached_state: + return self.cached_state[job_runner_id] + + @property + def opid(self) -> Optional[str]: + return self.cached_state.get("opid", None) + + @opid.setter + def opid(self, opid: str): + if self.opid: + raise ValueError("opid already set in job state") + self.cached_state["opid"] = opid + + def fetch_release_file(self, filename: str, suffix: str = None) -> str: + """ + Download a release file from the Git repository and save it as a temporary file. + Note: the temporary file is not deleted automatically. """ + url = self._build_release_url(filename) + logging.debug(f"Fetching release file from URL: {url}") + # download the file as a stream to handle large files + response = requests.get(url, stream=True) + try: + response.raise_for_status() + # create a named temporary file + with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp_file: + self._write_stream_to_file(response, tmp_file) + return tmp_file.name + finally: + response.close() - self.nmdc_db.data_object_set.append( - nmdc.DataObject( - file_size_bytes=os.stat(full_file_name).st_size, - name=name, - url=file_url, - data_object_type=data_object_type, - type=self._data_object_string, - id=dobj_id, - md5_checksum=md5_sum, - description=description.replace("{id}", omics_id), - ) - ) - - def create_activity_record( - self, - activity_record, - activity_name, - workflow, - activity_id, - resource, - has_inputs_list, - has_output_list, - omic_id, - start_time, - end_time, - ): - database_activity_set = self.activity_store[activity_record][0] - - database_activity_range = self.activity_store[activity_record][1] - - database_activity_set.append( - database_activity_range( - id=activity_id, # call minter for activity type - name=activity_name, - git_url=workflow["git_repo"], - version=workflow["release"], - part_of=[omic_id], - execution_resource=resource, - started_at_time=start_time, - has_input=has_inputs_list, - has_output=has_output_list, - type=activity_record, - ended_at_time=end_time, - was_informed_by=omic_id, - ) - ) - - def activity_map(self): + def _build_release_url(self, filename: str) -> str: + """Build the URL for a release file in the Git repository.""" + release = self.config["release"] + base_url = self.config["git_repo"].rstrip("/") + url = f"{base_url}{self.GIT_RELEASES_PATH}/{release}/{filename}" + + def _write_stream_to_file(self, response: requests.Response, file: tempfile.NamedTemporaryFile) -> None: + """Write a stream from a requests response to a file.""" + try: + for chunk in response.iter_content(chunk_size=self.CHUNK_SIZE): + if chunk: + file.write(chunk) + file.flush() + except Exception as e: + # clean up the temporary file + Path(file.name).unlink(missing_ok=True) + logging.error(f"Error writing stream to file: {e}") + raise e + + +class WorkflowJob: + """ + A class to manage a Workflow's job state and execution, including submission, status, and output. A WorkflowJob + combines a SiteConfig object, a WorkflowStateManager object, and a JobRunner object to manage the job state and + execution, and to propagate job results back to the workflow state and ultimately to the database. + A WorkflowJob object is created with: + - a SiteConfig object + - a workflow state dictionary + - a job metadata dictionary + - an optional operation id (opid) + - an optional JobRunnerABC object (default is CromwellRunner) + + + """ + def __init__(self, site_config: SiteConfig, workflow_state: Dict[str, Any] = None, + job_metadata: Dict['str', Any] = None, opid: str = None, job_runner: JobRunnerABC = None) -> None: + self.site_config = site_config + self.workflow = WorkflowStateManager(workflow_state, opid) + # default to CromwellRunner if no job_runner is provided + if job_runner is None: + job_runner = CromwellRunner(site_config, self.workflow, job_metadata) + self.job = job_runner + + # Properties to access the site config, job state, and job runner attributes + @property + def opid(self) -> Optional[str]: + """ Get the operation id """ + return self.workflow.state.get("opid", None) + + def set_opid(self, opid: str, force: bool = False): + """ Set the operation id """ + if self.opid and not force: + raise ValueError("opid already set in job state") + self.workflow.update_state({"opid": opid}) + + @property + def done(self) -> Optional[bool]: + """ Get the done state of the job """ + return self.workflow.state.get("done", None) + + @done.setter + def done(self, done: bool): + """ Set the done state of the job """ + self.workflow.update_state({"done": done}) + + @property + def job_status(self) -> str: """ - Inform Object Mapping Process what activies need to be imported and - distrubuted across the process + Get the status of the job. If the job has not been submitted, return "Unsubmitted". + If the job has failed and the number of retries has been exceeded, return "Failed". + Otherwise, return the status from the job runner. """ - - activity_store_dict = { - "nmdc:MetagenomeSequencing": ( - self.nmdc_db.metagenome_sequencing_activity_set, - nmdc.MetagenomeSequencingActivity, - ), - "nmdc:ReadQcAnalysisActivity": ( - self.nmdc_db.read_qc_analysis_activity_set, - nmdc.ReadQcAnalysisActivity, - ), - "nmdc:ReadBasedTaxonomyAnalysisActivity": ( - self.nmdc_db.read_based_taxonomy_analysis_activity_set, - nmdc.ReadBasedTaxonomyAnalysisActivity, - ), - "nmdc:MetagenomeAssembly": ( - self.nmdc_db.metagenome_assembly_set, - nmdc.MetagenomeAssembly, - ), - "nmdc:MetatranscriptomeAssembly": ( - self.nmdc_db.metatranscriptome_assembly_set, - nmdc.MetatranscriptomeAssembly, - ), - "nmdc:MetagenomeAnnotationActivity": ( - self.nmdc_db.metagenome_annotation_activity_set, - nmdc.MetagenomeAnnotationActivity, - ), - "nmdc:MetatranscriptomeAnnotationActivity": ( - self.nmdc_db.metatranscriptome_annotation_set, - nmdc.MetatranscriptomeAnnotationActivity, - ), - "nmdc:MetatranscriptomeExpressionAnalysis": ( - self.nmdc_db.metatranscriptome_expression_analysis_set, - nmdc.MetatranscriptomeExpressionAnalysis, - ), - "nmdc:MagsAnalysisActivity": ( - self.nmdc_db.mags_activity_set, - nmdc.MagsAnalysisActivity, - ), - } - - return activity_store_dict - - def get_database_object_dump(self): + status = None + # extend this list as needed for other job runners + job_id_keys = ["cromwell_jobid"] + failed_count = self.workflow.state.get("failed_count", 0) + # if none of the job id keys are in the workflow state, it is unsubmitted + if not any(key in self.workflow.state for key in job_id_keys): + status = "Unsubmitted" + self.workflow.update_state({"last_status": status}) + elif self.workflow.state.get("last_status") == "Succeeded": + status = "Succeeded" + elif self.workflow.state.get("last_status") == "Failed" and failed_count >= self.job.max_retries: + status = "Failed" + else: + status = self.job.get_job_status() + self.workflow.update_state({"last_status": status}) + return status + + @property + def workflow_execution_id(self) -> Optional[str]: + """ Get the workflow execution id """ + return self.workflow.workflow_execution_id + + @property + def data_dir(self) -> str: + """ Get the data directory """ + return self.site_config.data_dir + + @property + def execution_resource(self) -> str: + """ Get the execution resource (e.g., NERSC-Perlmutter) """ + return self.site_config.resource + + @property + def url_root(self) -> str: + """ Get the URL root """ + return self.site_config.url_root + + @property + def was_informed_by(self) -> str: + """ get the was_informed_by ID value """ + return self.workflow.was_informed_by + + @property + def as_workflow_execution_dict(self) -> Dict[str, Any]: + """ + Create a dictionary representation of the basic workflow execution attributes for a WorkflowJob. + """ + # for forward compatibility we need to strip Activity from the type + normalized_type = self.workflow.workflow_execution_type.replace("Activity", "") + base_dict = {"id": self.workflow_execution_id, "type": normalized_type, + "name": self.workflow.workflow_execution_name, "git_url": self.workflow.config["git_repo"], + "execution_resource": self.execution_resource, "was_informed_by": self.was_informed_by, + "has_input": [dobj["id"] for dobj in self.workflow.config["input_data_objects"]], + "started_at_time": self.workflow.state.get("start"), "ended_at_time": self.workflow.state.get("end"), + "version": self.workflow.config["release"], } + return base_dict + + def make_data_objects(self, output_dir: Union[str, Path] = None) -> List[DataObject]: + """ + Create DataObject objects for each output of the job. """ - Get the NMDC database object. - Returns: - nmdc.Database: NMDC database object. + data_objects = [] + + for output_spec in self.workflow.data_outputs: # specs are defined in the workflow.yaml file under Outputs + output_key = f"{self.workflow.input_prefix}.{output_spec['output']}" + if output_key not in self.job.outputs: + if output_spec.get("optional"): + logging.debug(f"Optional output {output_key} not found in job outputs") + continue + else: + logging.warning(f"Required output {output_key} not found in job outputs") + continue + # get the full path to the output file from the job_runner + output_file_path = Path(self.job.outputs[output_key]) + + md5_sum = _md5(output_file_path) + file_url = f"{self.url_root}/{self.was_informed_by}/{self.workflow_execution_id}/{output_file_path.name}" + + # copy the file to the output directory if provided + new_output_file_path = None + if output_dir: + new_output_file_path = Path(output_dir) / output_file_path.name + # copy the file to the output directory + shutil.copy(output_file_path, new_output_file_path) + else: + logging.warning(f"Output directory not provided, not copying {output_file_path} to output directory") + + # create a DataObject object + data_object = DataObject( + id=output_spec["id"], name=output_file_path.name, type="nmdc:DataObject", url=file_url, + data_object_type=output_spec["data_object_type"], md5_checksum=md5_sum, + description=output_spec["description"], was_generated_by=self.workflow_execution_id, ) + + data_objects.append(data_object) + return data_objects + + def make_workflow_execution_record(self, data_objects: List[DataObject]) -> Dict[str, Any]: """ - nmdc_database_object = json_dumper.dumps(self.nmdc_db, inject_type=False) - return nmdc_database_object + Create a workflow execution record for the job. This record includes the basic workflow execution attributes + and the data objects generated by the job. Additional workflow-specific attributes can be defined in the + workflow execution template and read from a job's output files. + The data objects are added to the record as a list of IDs in the "has_output" key. + """ + wf_dict = self.as_workflow_execution_dict + wf_dict["has_output"] = [dobj.id for dobj in data_objects] + + # workflow-specific keys + logical_names = set() + field_names = set() + pattern = r'\{outputs\.(\w+)\.(\w+)\}' + for attr_key, attr_val in self.workflow.execution_template.items(): + if attr_val.startswith("{outputs."): + match = re.match(pattern, attr_val) + if not match: + logging.warning(f"Invalid output reference {attr_val}") + continue + logical_names.add(match.group(1)) + field_names.add(match.group(2)) + + for logical_name in logical_names: + output_key = f"{self.workflow.input_prefix}.{logical_name}" + data_path = self.job.outputs.get(output_key) + if data_path: + # read in as json + with open(data_path) as f: + data = json.load(f) + for field_name in field_names: + # add to wf_dict if it has a value + if field_name in data: + wf_dict[field_name] = data[field_name] + else: + logging.warning(f"Field {field_name} not found in {data_path}") + + return wf_dict def _json_tmp(data): @@ -403,9 +550,5 @@ def _json_tmp(data): return fname -def jprint(obj): - print(json.dumps(obj, indent=2)) - - def _md5(file): return hashlib.md5(open(file, "rb").read()).hexdigest() diff --git a/nmdc_automation/workflow_automation/workflow_process.py b/nmdc_automation/workflow_automation/workflow_process.py new file mode 100644 index 00000000..1e2b5382 --- /dev/null +++ b/nmdc_automation/workflow_automation/workflow_process.py @@ -0,0 +1,265 @@ +""" This module contains functions to load workflow process nodes from the database. """ +import logging +from functools import lru_cache +from typing import List, Dict + +from semver.version import Version + +from nmdc_automation.workflow_automation.models import WorkflowProcessNode, DataObject, WorkflowConfig + +warned_objects = set() + + +def get_required_data_objects_map(db, workflows: List[WorkflowConfig]) -> Dict[str, DataObject]: + """ + Search for all the data objects that are required data object types for the workflows, + and return a dictionary of data objects by ID. + + TODO: In the future this will probably need to be redone + since the number of data objects could get very large. + """ + + # Build up a filter of what types are used + required_types = set() + for wf in workflows: + required_types.update(set(wf.data_object_types)) + + required_data_objs_by_id = dict() + for rec in db.data_object_set.find(): + do = DataObject(**rec) + if do.data_object_type not in required_types: + continue + required_data_objs_by_id[do.id] = do + return required_data_objs_by_id + + +@lru_cache +def _within_range(ver1: str, ver2: str) -> bool: + """ + Determine if two workflows are within a major and minor + version of each other. + """ + + def get_version(version): + v_string = version.lstrip("b").lstrip("v").rstrip("-beta") + return Version.parse(v_string) + + v1 = get_version(ver1) + v2 = get_version(ver2) + if v1.major == v2.major and v1.minor == v2.minor: + return True + return False + + +def _check(match_types, data_object_ids, data_objs): + """ + This iterates through a list of data objects and + checks the type against the match types. + """ + if not data_object_ids: + return False + if not match_types or len(match_types) == 0: + return True + match_set = set(match_types) + do_types = set() + for doid in data_object_ids: + if doid in data_objs: + do_types.add(data_objs[doid].data_object_type) + return match_set.issubset(do_types) + + +def _is_missing_required_input_output(wf: WorkflowConfig, rec: dict, data_objects_by_id: Dict[str, DataObject]) -> bool: + """ + Some workflows require specific inputs or outputs. This + implements the filtering for those. + """ + match_in = _check( + wf.filter_input_objects, rec.get("has_input"), data_objects_by_id + ) + match_out = _check( + wf.filter_output_objects, rec.get("has_output"), data_objects_by_id + ) + return not (match_in and match_out) + + +def get_current_workflow_process_nodes( + db, workflows: List[WorkflowConfig], + data_objects_by_id: Dict[str, DataObject], allowlist: List[str] = None) -> List[WorkflowProcessNode]: + """ + Fetch the relevant workflow process nodes for the given workflows. + 1. Get the Data Generation (formerly Omics Processing) records for the workflows by analyte category. + 2. Get the remaining Workflow Execution records that was_informed_by the Data Generation objects. + 3. Filter Workflow Execution records by: + - version (within range) if specified in the workflow + - input and output data objects required by the workflow + Returns a list of WorkflowProcessNode objects. + """ + workflow_process_nodes = set() + analyte_category = _determine_analyte_category(workflows) + + data_generation_ids = set() + data_generation_workflows = [wf for wf in workflows if wf.collection == "data_generation_set"] + workflow_execution_workflows = [wf for wf in workflows if wf.collection == "workflow_execution_set"] + + # default query for data_generation_set records filtered by analyte category + q = {"analyte_category": analyte_category} + # override query with allowlist + if allowlist: + q["id"] = {"$in": list(allowlist)} + dg_execution_records = db["data_generation_set"].find(q) + dg_execution_records = list(dg_execution_records) + + for wf in data_generation_workflows: + # Sequencing workflows don't have a git repo + for rec in dg_execution_records: + if _is_missing_required_input_output(wf, rec, data_objects_by_id): + continue + data_generation_ids.add(rec["id"]) + wfp_node = WorkflowProcessNode(rec, wf) + workflow_process_nodes.add(wfp_node) + + for wf in workflow_execution_workflows: + q = {} + if wf.git_repo: + q = {"git_url": wf.git_repo} + # override query with allowlist + if allowlist: + q = {"was_informed_by": {"$in": list(allowlist)}} + + records = db[wf.collection].find(q) + for rec in records: + if wf.version and not _within_range(rec["version"], wf.version): + continue + if _is_missing_required_input_output(wf, rec, data_objects_by_id): + continue + if rec["was_informed_by"] in data_generation_ids: + wfp_node = WorkflowProcessNode(rec, wf) + workflow_process_nodes.add(wfp_node) + + return list(workflow_process_nodes) + + +def _determine_analyte_category(workflows: List[WorkflowConfig]) -> str: + analyte_categories = set([wf.analyte_category for wf in workflows]) + if len(analyte_categories) > 1: + raise ValueError("Multiple analyte categories not supported") + elif len(analyte_categories) == 0: + raise ValueError("No analyte category found") + analyte_category = analyte_categories.pop() + return analyte_category.lower() + + +# TODO: Make public, give a better name, add type hints and unit tests. +def _resolve_relationships(wfp_nodes: List[WorkflowProcessNode], wfp_nodes_by_data_object_id: Dict[str, WorkflowProcessNode]) -> List[WorkflowProcessNode]: + """ + Find the parents and children relationships + between the activities + """ + # We now have a list of all the activites and + # a map of all of the data objects they generated. + # Let's use this to find the parent activity + # for each child activity + for wfp_node in wfp_nodes: + logging.debug(f"Processing {wfp_node.id} {wfp_node.name} {wfp_node.workflow.name}") + wfp_node_predecessors = wfp_node.workflow.parents + if not wfp_node_predecessors: + logging.debug("- No Predecessors") + continue + # Go through its inputs + for do_id in wfp_node.has_input: + if do_id not in wfp_nodes_by_data_object_id: + # This really shouldn't happen + if do_id not in warned_objects: + logging.warning(f"Missing data object {do_id}") + warned_objects.add(do_id) + continue + parent_wfp_node = wfp_nodes_by_data_object_id[do_id] + # This is to cover the case where it was a duplicate. + # This shouldn't happen in the future. + if not parent_wfp_node: + logging.warning("Parent node is none") + continue + # Let's make sure these came from the same source + # This is just a safeguard + if wfp_node.was_informed_by != parent_wfp_node.was_informed_by: + logging.warning( + "Mismatched informed by for " + f"{do_id} in {wfp_node.id} " + f"{wfp_node.was_informed_by} != " + f"{parent_wfp_node.was_informed_by}" + ) + continue + # We only want to use it as a parent if it is the right + # parent workflow. Some inputs may come from ancestors + # further up + if parent_wfp_node.workflow in wfp_node_predecessors: + # This is the one + wfp_node.parent = parent_wfp_node + parent_wfp_node.children.append(wfp_node) + logging.debug( + f"Found parent: {parent_wfp_node.id}" + f" {parent_wfp_node.name}" + ) + break + if len(wfp_node.workflow.parents) > 0 and not wfp_node.parent: + if wfp_node.id not in warned_objects: + logging.warning(f"Didn't find a parent for {wfp_node.id}") + warned_objects.add(wfp_node.id) + # Now all the activities have their parent + return wfp_nodes + + +def _associate_workflow_process_nodes_to_data_objects(wfp_nodes: List[WorkflowProcessNode], data_objs_by_id): + """ + Associate the data objects with workflow process nodes + """ + wfp_nodes_by_data_object_id = dict() + for wfp_node in wfp_nodes: + for do_id in wfp_node.has_output: + if do_id in data_objs_by_id: + do = data_objs_by_id[do_id] + wfp_node.add_data_object(do) + # If its a dupe, set it to none + # so we can ignore it later. + # Once we re-id the data objects this + # Post re-id we would not expect thi + if do_id in wfp_nodes_by_data_object_id: + if do_id not in warned_objects: + logging.warning(f"Duplicate output object {do_id}") + warned_objects.add(do_id) + wfp_nodes_by_data_object_id[do_id] = None + else: + wfp_nodes_by_data_object_id[do_id] = wfp_node + return wfp_nodes_by_data_object_id, wfp_nodes + + + +def load_workflow_process_nodes(db, workflows: list[WorkflowConfig], allowlist: list[str] = None) -> List[WorkflowProcessNode]: + """ + This reads the activities from Mongo. It also + finds the parent and child relationships between + the activities using the has_output and has_input + to connect things. + + Finally, it creates a map of data objects by type + for each activity. + + Inputs: + db: mongo database + workflow: workflow + """ + + # This is map from the data object ID to the activity + # that created it. + data_objs_by_id = get_required_data_objects_map(db, workflows) + + # Build up a set of relevant activities and a map from + # the output objects to the activity that generated them. + wfp_nodes = get_current_workflow_process_nodes(db, workflows, data_objs_by_id, allowlist) + + wfp_nodes_by_data_object_id, wfp_nodes = _associate_workflow_process_nodes_to_data_objects(wfp_nodes, data_objs_by_id) + + # Now populate the parent and children values for the + wfp_nodes = _resolve_relationships(wfp_nodes, wfp_nodes_by_data_object_id) + return wfp_nodes + diff --git a/nmdc_automation/workflow_automation/workflows.py b/nmdc_automation/workflow_automation/workflows.py index 6d01a2cd..acefd44b 100644 --- a/nmdc_automation/workflow_automation/workflows.py +++ b/nmdc_automation/workflow_automation/workflows.py @@ -1,85 +1,29 @@ -from __future__ import annotations +""" This module reads the workflows yaml file and returns a list of WorkflowConfig objects""" from yaml import load - try: from yaml import CLoader as Loader except ImportError: from yaml import Loader -import sys -# TODO: Berkley refactoring: -# Ensure that the Workflow class and load_workflows methods are compatible with the MetaTranscriptomics workflow. -def load_workflows(yaml_file) -> list[Workflow]: +from nmdc_automation.workflow_automation.models import WorkflowConfig + + +def load_workflow_configs(yaml_file) -> list[WorkflowConfig]: """ - Load all workflow definitions from a yaml file, populate - parent-child relationships and return a list of Workflow - objects. + Read the workflows yaml file and return a list of WorkflowConfig objects """ - workflows = [] + workflow_configs = [] data = load(open(yaml_file), Loader) for wf in data["Workflows"]: - workflows.append(Workflow(wf)) + # normalize the keys from Key Name to key_name + wf = {k.replace(" ", "_").lower(): v for k, v in wf.items()} + workflow_configs.append(WorkflowConfig(**wf)) # Populate workflow dependencies - for wf in workflows: - for wf2 in workflows: + for wf in workflow_configs: + for wf2 in workflow_configs: if not wf2.predecessors: continue if wf.name in wf2.predecessors: wf.add_child(wf2) wf2.add_parent(wf) - return workflows - - -class Workflow: - """ - Workflow object class - """ - - _FIELDS = [ - "Name", - "Type", - "Enabled", - "Git_repo", - "Version", - "WDL", - "Analyte Category", - "Collection", - "Predecessors", - "Input_prefix", - "Inputs", - "Activity", - "Filter Input Objects", - "Filter Output Objects", - "Outputs", - "Optional Inputs" - ] - - def __init__(self, wf: dict): - """ - Create a workflow object from a - dictionary - """ - self.children = set() - self.parents = set() - self.do_types = [] - for f in self._FIELDS: - attr_name = f.lower().replace(" ", "_") - setattr(self, attr_name, wf.get(f)) - if not self.inputs: - self.inputs = {} - if not self.optional_inputs: - self.optional_inputs = [] - for _, inp_param in self.inputs.items(): - if inp_param.startswith("do:"): - self.do_types.append(inp_param[3:]) - - def add_child(self, child: Workflow): - self.children.add(child) - - def add_parent(self, parent: Workflow): - self.parents.add(parent) - - -if __name__ == "__main__": - wff = sys.argv[1] - load_workflows(wff) + return workflow_configs diff --git a/poetry.lock b/poetry.lock index 75852c80..f71e5de7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. [[package]] name = "annotated-types" @@ -342,22 +342,22 @@ toml = ["tomli"] [[package]] name = "curies" -version = "0.7.10" -description = "Idiomatic conversion between URIs and compact URIs (CURIEs)." +version = "0.8.0" +description = "Idiomatic conversion between URIs and compact URIs (CURIEs)" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "curies-0.7.10-py3-none-any.whl", hash = "sha256:ad80f420dd76b6f3e921a245370ff6ab7473c48c29c17254970c03cd2e58af5f"}, - {file = "curies-0.7.10.tar.gz", hash = "sha256:98a7ceb94710fab3a02727a7f85ba0719dd22be5fc8b5f2ad1d7d4cfc47d64ce"}, + {file = "curies-0.8.0-py3-none-any.whl", hash = "sha256:da2f47270a778cc3ac5e573c1fcaab5c7f7ea91d9b542ada50d5fafb240997cd"}, + {file = "curies-0.8.0.tar.gz", hash = "sha256:be743facd2cc33cf3f827e318f846dcc192e7f7787a2bf215cb4c317e94ac02d"}, ] [package.dependencies] -pydantic = "*" +pydantic = ">=2.0" pytrie = "*" requests = "*" [package.extras] -docs = ["sphinx", "sphinx-automodapi", "sphinx-rtd-theme"] +docs = ["sphinx (>=8)", "sphinx-automodapi", "sphinx-rtd-theme (>=3.0)"] fastapi = ["defusedxml", "fastapi", "httpx", "python-multipart", "uvicorn"] flask = ["defusedxml", "flask"] pandas = ["pandas"] @@ -401,21 +401,21 @@ dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] [[package]] name = "dnspython" -version = "2.6.1" +version = "2.7.0" description = "DNS toolkit" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "dnspython-2.6.1-py3-none-any.whl", hash = "sha256:5ef3b9680161f6fa89daf8ad451b5f1a33b18ae8a1c6778cdf4b43f08c0a6e50"}, - {file = "dnspython-2.6.1.tar.gz", hash = "sha256:e8f0f9c23a7b7cb99ded64e6c3a6f3e701d78f50c55e002b839dea7225cff7cc"}, + {file = "dnspython-2.7.0-py3-none-any.whl", hash = "sha256:b4c34b7d10b51bcc3a5071e7b8dee77939f1e878477eeecc965e9835f63c6c86"}, + {file = "dnspython-2.7.0.tar.gz", hash = "sha256:ce9c432eda0dc91cf618a5cedf1a4e142651196bbcd2c80e89ed5a907e5cfaf1"}, ] [package.extras] -dev = ["black (>=23.1.0)", "coverage (>=7.0)", "flake8 (>=7)", "mypy (>=1.8)", "pylint (>=3)", "pytest (>=7.4)", "pytest-cov (>=4.1.0)", "sphinx (>=7.2.0)", "twine (>=4.0.0)", "wheel (>=0.42.0)"] -dnssec = ["cryptography (>=41)"] +dev = ["black (>=23.1.0)", "coverage (>=7.0)", "flake8 (>=7)", "hypercorn (>=0.16.0)", "mypy (>=1.8)", "pylint (>=3)", "pytest (>=7.4)", "pytest-cov (>=4.1.0)", "quart-trio (>=0.11.0)", "sphinx (>=7.2.0)", "sphinx-rtd-theme (>=2.0.0)", "twine (>=4.0.0)", "wheel (>=0.42.0)"] +dnssec = ["cryptography (>=43)"] doh = ["h2 (>=4.1.0)", "httpcore (>=1.0.0)", "httpx (>=0.26.0)"] -doq = ["aioquic (>=0.9.25)"] -idna = ["idna (>=3.6)"] +doq = ["aioquic (>=1.0.0)"] +idna = ["idna (>=3.7)"] trio = ["trio (>=0.23)"] wmi = ["wmi (>=1.5.1)"] @@ -828,13 +828,13 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339- [[package]] name = "jsonschema-specifications" -version = "2023.12.1" +version = "2024.10.1" description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "jsonschema_specifications-2023.12.1-py3-none-any.whl", hash = "sha256:87e4fdf3a94858b8a2ba2778d9ba57d8a9cafca7c7489c46ba0d30a8bc6a9c3c"}, - {file = "jsonschema_specifications-2023.12.1.tar.gz", hash = "sha256:48a76787b3e70f5ed53f1160d2b81f586e4ca6d1548c5de7085d1682674764cc"}, + {file = "jsonschema_specifications-2024.10.1-py3-none-any.whl", hash = "sha256:a09a0680616357d9a0ecf05c12ad234479f549239d0f5b55f3deea67475da9bf"}, + {file = "jsonschema_specifications-2024.10.1.tar.gz", hash = "sha256:0f38b83639958ce1152d02a7f062902c41c8fd20d558b0c34344292d417ae272"}, ] [package.dependencies] @@ -842,13 +842,13 @@ referencing = ">=0.31.0" [[package]] name = "linkml" -version = "1.8.3" +version = "1.8.4" description = "Linked Open Data Modeling Language" optional = false python-versions = "<4.0.0,>=3.8.1" files = [ - {file = "linkml-1.8.3-py3-none-any.whl", hash = "sha256:ced1af3055312d15335cfe8846847c0491519c9af28cce5ebd8e4e26e4361754"}, - {file = "linkml-1.8.3.tar.gz", hash = "sha256:6bf65f3d6c4ce9e88af0fda71b954ae4c6f5e885f8b4d74c1090380d565e76ba"}, + {file = "linkml-1.8.4-py3-none-any.whl", hash = "sha256:9fa26d7e522dd98568d0313e363abd67c2b17a671f31595c9e53beaec27b7214"}, + {file = "linkml-1.8.4.tar.gz", hash = "sha256:a1918d226ab5c268e260ddc7cfa6edb53d7faab3fb0144efef5fad572b052c36"}, ] [package.dependencies] @@ -879,8 +879,9 @@ watchdog = ">=0.9.0" [package.extras] black = ["black (>=24.0.0)"] +numpydantic = ["numpydantic (>=1.6.1)"] shacl = ["pyshacl (>=0.25.0,<0.26.0)"] -tests = ["black (>=24.0.0)", "pyshacl (>=0.25.0,<0.26.0)"] +tests = ["black (>=24.0.0)", "numpydantic (>=1.6.1)", "pyshacl (>=0.25.0,<0.26.0)"] [[package]] name = "linkml-dataops" @@ -902,13 +903,13 @@ linkml-runtime = ">=1.1.6" [[package]] name = "linkml-runtime" -version = "1.8.2" +version = "1.8.3" description = "Runtime environment for LinkML, the Linked open data modeling language" optional = false python-versions = "<4.0,>=3.8" files = [ - {file = "linkml_runtime-1.8.2-py3-none-any.whl", hash = "sha256:a66d7b5b82cb57b2d6c603c75ca22db4bae0409e0fb2b9e7835f921a23716096"}, - {file = "linkml_runtime-1.8.2.tar.gz", hash = "sha256:f5067aeeb96c8d3ca1761b55b82d927af88d810459d533fb1f7876a90224b130"}, + {file = "linkml_runtime-1.8.3-py3-none-any.whl", hash = "sha256:0750920f1348fffa903d99e7b5834ce425a2a538285aff9068dbd96d05caabd1"}, + {file = "linkml_runtime-1.8.3.tar.gz", hash = "sha256:5b7f682eef54aaf0a59c50eeacdb11463b43b124a044caf496cde59936ac05c8"}, ] [package.dependencies] @@ -946,71 +947,72 @@ testing = ["coverage", "pyyaml"] [[package]] name = "markupsafe" -version = "2.1.5" +version = "3.0.1" description = "Safely add untrusted strings to HTML/XML markup." optional = false -python-versions = ">=3.7" +python-versions = ">=3.9" files = [ - {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-win32.whl", hash = "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-win_amd64.whl", hash = "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-win32.whl", hash = "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-win32.whl", hash = "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-win32.whl", hash = "sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-win_amd64.whl", hash = "sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-win32.whl", hash = "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-win_amd64.whl", hash = "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-win32.whl", hash = "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-win_amd64.whl", hash = "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5"}, - {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"}, + {file = "MarkupSafe-3.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:db842712984e91707437461930e6011e60b39136c7331e971952bb30465bc1a1"}, + {file = "MarkupSafe-3.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3ffb4a8e7d46ed96ae48805746755fadd0909fea2306f93d5d8233ba23dda12a"}, + {file = "MarkupSafe-3.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67c519635a4f64e495c50e3107d9b4075aec33634272b5db1cde839e07367589"}, + {file = "MarkupSafe-3.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48488d999ed50ba8d38c581d67e496f955821dc183883550a6fbc7f1aefdc170"}, + {file = "MarkupSafe-3.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f31ae06f1328595d762c9a2bf29dafd8621c7d3adc130cbb46278079758779ca"}, + {file = "MarkupSafe-3.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:80fcbf3add8790caddfab6764bde258b5d09aefbe9169c183f88a7410f0f6dea"}, + {file = "MarkupSafe-3.0.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3341c043c37d78cc5ae6e3e305e988532b072329639007fd408a476642a89fd6"}, + {file = "MarkupSafe-3.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cb53e2a99df28eee3b5f4fea166020d3ef9116fdc5764bc5117486e6d1211b25"}, + {file = "MarkupSafe-3.0.1-cp310-cp310-win32.whl", hash = "sha256:db15ce28e1e127a0013dfb8ac243a8e392db8c61eae113337536edb28bdc1f97"}, + {file = "MarkupSafe-3.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:4ffaaac913c3f7345579db4f33b0020db693f302ca5137f106060316761beea9"}, + {file = "MarkupSafe-3.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:26627785a54a947f6d7336ce5963569b5d75614619e75193bdb4e06e21d447ad"}, + {file = "MarkupSafe-3.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b954093679d5750495725ea6f88409946d69cfb25ea7b4c846eef5044194f583"}, + {file = "MarkupSafe-3.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:973a371a55ce9ed333a3a0f8e0bcfae9e0d637711534bcb11e130af2ab9334e7"}, + {file = "MarkupSafe-3.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:244dbe463d5fb6d7ce161301a03a6fe744dac9072328ba9fc82289238582697b"}, + {file = "MarkupSafe-3.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d98e66a24497637dd31ccab090b34392dddb1f2f811c4b4cd80c230205c074a3"}, + {file = "MarkupSafe-3.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ad91738f14eb8da0ff82f2acd0098b6257621410dcbd4df20aaa5b4233d75a50"}, + {file = "MarkupSafe-3.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7044312a928a66a4c2a22644147bc61a199c1709712069a344a3fb5cfcf16915"}, + {file = "MarkupSafe-3.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a4792d3b3a6dfafefdf8e937f14906a51bd27025a36f4b188728a73382231d91"}, + {file = "MarkupSafe-3.0.1-cp311-cp311-win32.whl", hash = "sha256:fa7d686ed9883f3d664d39d5a8e74d3c5f63e603c2e3ff0abcba23eac6542635"}, + {file = "MarkupSafe-3.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:9ba25a71ebf05b9bb0e2ae99f8bc08a07ee8e98c612175087112656ca0f5c8bf"}, + {file = "MarkupSafe-3.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:8ae369e84466aa70f3154ee23c1451fda10a8ee1b63923ce76667e3077f2b0c4"}, + {file = "MarkupSafe-3.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40f1e10d51c92859765522cbd79c5c8989f40f0419614bcdc5015e7b6bf97fc5"}, + {file = "MarkupSafe-3.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a4cb365cb49b750bdb60b846b0c0bc49ed62e59a76635095a179d440540c346"}, + {file = "MarkupSafe-3.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee3941769bd2522fe39222206f6dd97ae83c442a94c90f2b7a25d847d40f4729"}, + {file = "MarkupSafe-3.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62fada2c942702ef8952754abfc1a9f7658a4d5460fabe95ac7ec2cbe0d02abc"}, + {file = "MarkupSafe-3.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4c2d64fdba74ad16138300815cfdc6ab2f4647e23ced81f59e940d7d4a1469d9"}, + {file = "MarkupSafe-3.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:fb532dd9900381d2e8f48172ddc5a59db4c445a11b9fab40b3b786da40d3b56b"}, + {file = "MarkupSafe-3.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0f84af7e813784feb4d5e4ff7db633aba6c8ca64a833f61d8e4eade234ef0c38"}, + {file = "MarkupSafe-3.0.1-cp312-cp312-win32.whl", hash = "sha256:cbf445eb5628981a80f54087f9acdbf84f9b7d862756110d172993b9a5ae81aa"}, + {file = "MarkupSafe-3.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:a10860e00ded1dd0a65b83e717af28845bb7bd16d8ace40fe5531491de76b79f"}, + {file = "MarkupSafe-3.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e81c52638315ff4ac1b533d427f50bc0afc746deb949210bc85f05d4f15fd772"}, + {file = "MarkupSafe-3.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:312387403cd40699ab91d50735ea7a507b788091c416dd007eac54434aee51da"}, + {file = "MarkupSafe-3.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ae99f31f47d849758a687102afdd05bd3d3ff7dbab0a8f1587981b58a76152a"}, + {file = "MarkupSafe-3.0.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c97ff7fedf56d86bae92fa0a646ce1a0ec7509a7578e1ed238731ba13aabcd1c"}, + {file = "MarkupSafe-3.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7420ceda262dbb4b8d839a4ec63d61c261e4e77677ed7c66c99f4e7cb5030dd"}, + {file = "MarkupSafe-3.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:45d42d132cff577c92bfba536aefcfea7e26efb975bd455db4e6602f5c9f45e7"}, + {file = "MarkupSafe-3.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4c8817557d0de9349109acb38b9dd570b03cc5014e8aabf1cbddc6e81005becd"}, + {file = "MarkupSafe-3.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6a54c43d3ec4cf2a39f4387ad044221c66a376e58c0d0e971d47c475ba79c6b5"}, + {file = "MarkupSafe-3.0.1-cp313-cp313-win32.whl", hash = "sha256:c91b394f7601438ff79a4b93d16be92f216adb57d813a78be4446fe0f6bc2d8c"}, + {file = "MarkupSafe-3.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:fe32482b37b4b00c7a52a07211b479653b7fe4f22b2e481b9a9b099d8a430f2f"}, + {file = "MarkupSafe-3.0.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:17b2aea42a7280db02ac644db1d634ad47dcc96faf38ab304fe26ba2680d359a"}, + {file = "MarkupSafe-3.0.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:852dc840f6d7c985603e60b5deaae1d89c56cb038b577f6b5b8c808c97580f1d"}, + {file = "MarkupSafe-3.0.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0778de17cff1acaeccc3ff30cd99a3fd5c50fc58ad3d6c0e0c4c58092b859396"}, + {file = "MarkupSafe-3.0.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:800100d45176652ded796134277ecb13640c1a537cad3b8b53da45aa96330453"}, + {file = "MarkupSafe-3.0.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d06b24c686a34c86c8c1fba923181eae6b10565e4d80bdd7bc1c8e2f11247aa4"}, + {file = "MarkupSafe-3.0.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:33d1c36b90e570ba7785dacd1faaf091203d9942bc036118fab8110a401eb1a8"}, + {file = "MarkupSafe-3.0.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:beeebf760a9c1f4c07ef6a53465e8cfa776ea6a2021eda0d0417ec41043fe984"}, + {file = "MarkupSafe-3.0.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:bbde71a705f8e9e4c3e9e33db69341d040c827c7afa6789b14c6e16776074f5a"}, + {file = "MarkupSafe-3.0.1-cp313-cp313t-win32.whl", hash = "sha256:82b5dba6eb1bcc29cc305a18a3c5365d2af06ee71b123216416f7e20d2a84e5b"}, + {file = "MarkupSafe-3.0.1-cp313-cp313t-win_amd64.whl", hash = "sha256:730d86af59e0e43ce277bb83970530dd223bf7f2a838e086b50affa6ec5f9295"}, + {file = "MarkupSafe-3.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:4935dd7883f1d50e2ffecca0aa33dc1946a94c8f3fdafb8df5c330e48f71b132"}, + {file = "MarkupSafe-3.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e9393357f19954248b00bed7c56f29a25c930593a77630c719653d51e7669c2a"}, + {file = "MarkupSafe-3.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40621d60d0e58aa573b68ac5e2d6b20d44392878e0bfc159012a5787c4e35bc8"}, + {file = "MarkupSafe-3.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f94190df587738280d544971500b9cafc9b950d32efcb1fba9ac10d84e6aa4e6"}, + {file = "MarkupSafe-3.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b6a387d61fe41cdf7ea95b38e9af11cfb1a63499af2759444b99185c4ab33f5b"}, + {file = "MarkupSafe-3.0.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8ad4ad1429cd4f315f32ef263c1342166695fad76c100c5d979c45d5570ed58b"}, + {file = "MarkupSafe-3.0.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:e24bfe89c6ac4c31792793ad9f861b8f6dc4546ac6dc8f1c9083c7c4f2b335cd"}, + {file = "MarkupSafe-3.0.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2a4b34a8d14649315c4bc26bbfa352663eb51d146e35eef231dd739d54a5430a"}, + {file = "MarkupSafe-3.0.1-cp39-cp39-win32.whl", hash = "sha256:242d6860f1fd9191aef5fae22b51c5c19767f93fb9ead4d21924e0bcb17619d8"}, + {file = "MarkupSafe-3.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:93e8248d650e7e9d49e8251f883eed60ecbc0e8ffd6349e18550925e31bd029b"}, + {file = "markupsafe-3.0.1.tar.gz", hash = "sha256:3e683ee4f5d0fa2dde4db77ed8dd8a876686e3fc417655c2ece9a90576905344"}, ] [[package]] @@ -1085,13 +1087,13 @@ pyyaml = ">=5.1" [[package]] name = "mkdocs-material" -version = "9.5.35" +version = "9.5.39" description = "Documentation that simply works" optional = false python-versions = ">=3.8" files = [ - {file = "mkdocs_material-9.5.35-py3-none-any.whl", hash = "sha256:44e069d87732d29f4a2533ae0748fa0e67e270043270c71f04d0fba11a357b24"}, - {file = "mkdocs_material-9.5.35.tar.gz", hash = "sha256:0d233d7db067ac896bf22ee7950eebf2b1eaf26c155bb27382bf4174021cc117"}, + {file = "mkdocs_material-9.5.39-py3-none-any.whl", hash = "sha256:0f2f68c8db89523cb4a59705cd01b4acd62b2f71218ccb67e1e004e560410d2b"}, + {file = "mkdocs_material-9.5.39.tar.gz", hash = "sha256:25faa06142afa38549d2b781d475a86fb61de93189f532b88e69bf11e5e5c3be"}, ] [package.dependencies] @@ -1184,13 +1186,13 @@ pymongo = ["pymongo"] [[package]] name = "nmdc-schema" -version = "10.8.0" +version = "11.0.1" description = "Schema resources for the National Microbiome Data Collaborative (NMDC)" optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "nmdc_schema-10.8.0-py3-none-any.whl", hash = "sha256:5795db3ae5d2a12143f3fc785cf6433b7c6712390b53fe77f256a70d31281f7c"}, - {file = "nmdc_schema-10.8.0.tar.gz", hash = "sha256:5f9db462f63abdc3b722206242c7089c4624229b7308ec295b77b069fc0e755c"}, + {file = "nmdc_schema-11.0.1-py3-none-any.whl", hash = "sha256:ff4e8a150fd192fb8daf7423f7ac1bd6b0dd26c68d8030e0359748fbf4e8fd15"}, + {file = "nmdc_schema-11.0.1.tar.gz", hash = "sha256:e491484308375ae913924112b39f38796e9881abd5701621105b3d455c1ca97e"}, ] [package.dependencies] @@ -1200,6 +1202,7 @@ mkdocs = ">=1.4.2,<2.0.0" mkdocs-mermaid2-plugin = ">=0.6.0,<0.7.0" mkdocs-redirects = ">=1.2.1,<2.0.0" pymongo = ">=4.7.2,<5.0.0" +ruamel-yaml = ">=0.18.6,<0.19.0" [package.extras] docs = ["mkdocs-material (>=9.0.12,<10.0.0)"] @@ -1260,64 +1263,64 @@ files = [ [[package]] name = "numpy" -version = "2.1.1" +version = "2.1.2" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.10" files = [ - {file = "numpy-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c8a0e34993b510fc19b9a2ce7f31cb8e94ecf6e924a40c0c9dd4f62d0aac47d9"}, - {file = "numpy-2.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7dd86dfaf7c900c0bbdcb8b16e2f6ddf1eb1fe39c6c8cca6e94844ed3152a8fd"}, - {file = "numpy-2.1.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:5889dd24f03ca5a5b1e8a90a33b5a0846d8977565e4ae003a63d22ecddf6782f"}, - {file = "numpy-2.1.1-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:59ca673ad11d4b84ceb385290ed0ebe60266e356641428c845b39cd9df6713ab"}, - {file = "numpy-2.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13ce49a34c44b6de5241f0b38b07e44c1b2dcacd9e36c30f9c2fcb1bb5135db7"}, - {file = "numpy-2.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:913cc1d311060b1d409e609947fa1b9753701dac96e6581b58afc36b7ee35af6"}, - {file = "numpy-2.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:caf5d284ddea7462c32b8d4a6b8af030b6c9fd5332afb70e7414d7fdded4bfd0"}, - {file = "numpy-2.1.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:57eb525e7c2a8fdee02d731f647146ff54ea8c973364f3b850069ffb42799647"}, - {file = "numpy-2.1.1-cp310-cp310-win32.whl", hash = "sha256:9a8e06c7a980869ea67bbf551283bbed2856915f0a792dc32dd0f9dd2fb56728"}, - {file = "numpy-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:d10c39947a2d351d6d466b4ae83dad4c37cd6c3cdd6d5d0fa797da56f710a6ae"}, - {file = "numpy-2.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0d07841fd284718feffe7dd17a63a2e6c78679b2d386d3e82f44f0108c905550"}, - {file = "numpy-2.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b5613cfeb1adfe791e8e681128f5f49f22f3fcaa942255a6124d58ca59d9528f"}, - {file = "numpy-2.1.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:0b8cc2715a84b7c3b161f9ebbd942740aaed913584cae9cdc7f8ad5ad41943d0"}, - {file = "numpy-2.1.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:b49742cdb85f1f81e4dc1b39dcf328244f4d8d1ded95dea725b316bd2cf18c95"}, - {file = "numpy-2.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8d5f8a8e3bc87334f025194c6193e408903d21ebaeb10952264943a985066ca"}, - {file = "numpy-2.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d51fc141ddbe3f919e91a096ec739f49d686df8af254b2053ba21a910ae518bf"}, - {file = "numpy-2.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:98ce7fb5b8063cfdd86596b9c762bf2b5e35a2cdd7e967494ab78a1fa7f8b86e"}, - {file = "numpy-2.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:24c2ad697bd8593887b019817ddd9974a7f429c14a5469d7fad413f28340a6d2"}, - {file = "numpy-2.1.1-cp311-cp311-win32.whl", hash = "sha256:397bc5ce62d3fb73f304bec332171535c187e0643e176a6e9421a6e3eacef06d"}, - {file = "numpy-2.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:ae8ce252404cdd4de56dcfce8b11eac3c594a9c16c231d081fb705cf23bd4d9e"}, - {file = "numpy-2.1.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:7c803b7934a7f59563db459292e6aa078bb38b7ab1446ca38dd138646a38203e"}, - {file = "numpy-2.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6435c48250c12f001920f0751fe50c0348f5f240852cfddc5e2f97e007544cbe"}, - {file = "numpy-2.1.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:3269c9eb8745e8d975980b3a7411a98976824e1fdef11f0aacf76147f662b15f"}, - {file = "numpy-2.1.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:fac6e277a41163d27dfab5f4ec1f7a83fac94e170665a4a50191b545721c6521"}, - {file = "numpy-2.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fcd8f556cdc8cfe35e70efb92463082b7f43dd7e547eb071ffc36abc0ca4699b"}, - {file = "numpy-2.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b9cd92c8f8e7b313b80e93cedc12c0112088541dcedd9197b5dee3738c1201"}, - {file = "numpy-2.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:afd9c680df4de71cd58582b51e88a61feed4abcc7530bcd3d48483f20fc76f2a"}, - {file = "numpy-2.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8661c94e3aad18e1ea17a11f60f843a4933ccaf1a25a7c6a9182af70610b2313"}, - {file = "numpy-2.1.1-cp312-cp312-win32.whl", hash = "sha256:950802d17a33c07cba7fd7c3dcfa7d64705509206be1606f196d179e539111ed"}, - {file = "numpy-2.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:3fc5eabfc720db95d68e6646e88f8b399bfedd235994016351b1d9e062c4b270"}, - {file = "numpy-2.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:046356b19d7ad1890c751b99acad5e82dc4a02232013bd9a9a712fddf8eb60f5"}, - {file = "numpy-2.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6e5a9cb2be39350ae6c8f79410744e80154df658d5bea06e06e0ac5bb75480d5"}, - {file = "numpy-2.1.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:d4c57b68c8ef5e1ebf47238e99bf27657511ec3f071c465f6b1bccbef12d4136"}, - {file = "numpy-2.1.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:8ae0fd135e0b157365ac7cc31fff27f07a5572bdfc38f9c2d43b2aff416cc8b0"}, - {file = "numpy-2.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:981707f6b31b59c0c24bcda52e5605f9701cb46da4b86c2e8023656ad3e833cb"}, - {file = "numpy-2.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ca4b53e1e0b279142113b8c5eb7d7a877e967c306edc34f3b58e9be12fda8df"}, - {file = "numpy-2.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e097507396c0be4e547ff15b13dc3866f45f3680f789c1a1301b07dadd3fbc78"}, - {file = "numpy-2.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7506387e191fe8cdb267f912469a3cccc538ab108471291636a96a54e599556"}, - {file = "numpy-2.1.1-cp313-cp313-win32.whl", hash = "sha256:251105b7c42abe40e3a689881e1793370cc9724ad50d64b30b358bbb3a97553b"}, - {file = "numpy-2.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:f212d4f46b67ff604d11fff7cc62d36b3e8714edf68e44e9760e19be38c03eb0"}, - {file = "numpy-2.1.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:920b0911bb2e4414c50e55bd658baeb78281a47feeb064ab40c2b66ecba85553"}, - {file = "numpy-2.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:bab7c09454460a487e631ffc0c42057e3d8f2a9ddccd1e60c7bb8ed774992480"}, - {file = "numpy-2.1.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:cea427d1350f3fd0d2818ce7350095c1a2ee33e30961d2f0fef48576ddbbe90f"}, - {file = "numpy-2.1.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:e30356d530528a42eeba51420ae8bf6c6c09559051887196599d96ee5f536468"}, - {file = "numpy-2.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8dfa9e94fc127c40979c3eacbae1e61fda4fe71d84869cc129e2721973231ef"}, - {file = "numpy-2.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:910b47a6d0635ec1bd53b88f86120a52bf56dcc27b51f18c7b4a2e2224c29f0f"}, - {file = "numpy-2.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:13cc11c00000848702322af4de0147ced365c81d66053a67c2e962a485b3717c"}, - {file = "numpy-2.1.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:53e27293b3a2b661c03f79aa51c3987492bd4641ef933e366e0f9f6c9bf257ec"}, - {file = "numpy-2.1.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7be6a07520b88214ea85d8ac8b7d6d8a1839b0b5cb87412ac9f49fa934eb15d5"}, - {file = "numpy-2.1.1-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:52ac2e48f5ad847cd43c4755520a2317f3380213493b9d8a4c5e37f3b87df504"}, - {file = "numpy-2.1.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50a95ca3560a6058d6ea91d4629a83a897ee27c00630aed9d933dff191f170cd"}, - {file = "numpy-2.1.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:99f4a9ee60eed1385a86e82288971a51e71df052ed0b2900ed30bc840c0f2e39"}, - {file = "numpy-2.1.1.tar.gz", hash = "sha256:d0cf7d55b1051387807405b3898efafa862997b4cba8aa5dbe657be794afeafd"}, + {file = "numpy-2.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:30d53720b726ec36a7f88dc873f0eec8447fbc93d93a8f079dfac2629598d6ee"}, + {file = "numpy-2.1.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e8d3ca0a72dd8846eb6f7dfe8f19088060fcb76931ed592d29128e0219652884"}, + {file = "numpy-2.1.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:fc44e3c68ff00fd991b59092a54350e6e4911152682b4782f68070985aa9e648"}, + {file = "numpy-2.1.2-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:7c1c60328bd964b53f8b835df69ae8198659e2b9302ff9ebb7de4e5a5994db3d"}, + {file = "numpy-2.1.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6cdb606a7478f9ad91c6283e238544451e3a95f30fb5467fbf715964341a8a86"}, + {file = "numpy-2.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d666cb72687559689e9906197e3bec7b736764df6a2e58ee265e360663e9baf7"}, + {file = "numpy-2.1.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c6eef7a2dbd0abfb0d9eaf78b73017dbfd0b54051102ff4e6a7b2980d5ac1a03"}, + {file = "numpy-2.1.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:12edb90831ff481f7ef5f6bc6431a9d74dc0e5ff401559a71e5e4611d4f2d466"}, + {file = "numpy-2.1.2-cp310-cp310-win32.whl", hash = "sha256:a65acfdb9c6ebb8368490dbafe83c03c7e277b37e6857f0caeadbbc56e12f4fb"}, + {file = "numpy-2.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:860ec6e63e2c5c2ee5e9121808145c7bf86c96cca9ad396c0bd3e0f2798ccbe2"}, + {file = "numpy-2.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b42a1a511c81cc78cbc4539675713bbcf9d9c3913386243ceff0e9429ca892fe"}, + {file = "numpy-2.1.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:faa88bc527d0f097abdc2c663cddf37c05a1c2f113716601555249805cf573f1"}, + {file = "numpy-2.1.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:c82af4b2ddd2ee72d1fc0c6695048d457e00b3582ccde72d8a1c991b808bb20f"}, + {file = "numpy-2.1.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:13602b3174432a35b16c4cfb5de9a12d229727c3dd47a6ce35111f2ebdf66ff4"}, + {file = "numpy-2.1.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ebec5fd716c5a5b3d8dfcc439be82a8407b7b24b230d0ad28a81b61c2f4659a"}, + {file = "numpy-2.1.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2b49c3c0804e8ecb05d59af8386ec2f74877f7ca8fd9c1e00be2672e4d399b1"}, + {file = "numpy-2.1.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2cbba4b30bf31ddbe97f1c7205ef976909a93a66bb1583e983adbd155ba72ac2"}, + {file = "numpy-2.1.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8e00ea6fc82e8a804433d3e9cedaa1051a1422cb6e443011590c14d2dea59146"}, + {file = "numpy-2.1.2-cp311-cp311-win32.whl", hash = "sha256:5006b13a06e0b38d561fab5ccc37581f23c9511879be7693bd33c7cd15ca227c"}, + {file = "numpy-2.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:f1eb068ead09f4994dec71c24b2844f1e4e4e013b9629f812f292f04bd1510d9"}, + {file = "numpy-2.1.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7bf0a4f9f15b32b5ba53147369e94296f5fffb783db5aacc1be15b4bf72f43b"}, + {file = "numpy-2.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b1d0fcae4f0949f215d4632be684a539859b295e2d0cb14f78ec231915d644db"}, + {file = "numpy-2.1.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:f751ed0a2f250541e19dfca9f1eafa31a392c71c832b6bb9e113b10d050cb0f1"}, + {file = "numpy-2.1.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:bd33f82e95ba7ad632bc57837ee99dba3d7e006536200c4e9124089e1bf42426"}, + {file = "numpy-2.1.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b8cde4f11f0a975d1fd59373b32e2f5a562ade7cde4f85b7137f3de8fbb29a0"}, + {file = "numpy-2.1.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d95f286b8244b3649b477ac066c6906fbb2905f8ac19b170e2175d3d799f4df"}, + {file = "numpy-2.1.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ab4754d432e3ac42d33a269c8567413bdb541689b02d93788af4131018cbf366"}, + {file = "numpy-2.1.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e585c8ae871fd38ac50598f4763d73ec5497b0de9a0ab4ef5b69f01c6a046142"}, + {file = "numpy-2.1.2-cp312-cp312-win32.whl", hash = "sha256:9c6c754df29ce6a89ed23afb25550d1c2d5fdb9901d9c67a16e0b16eaf7e2550"}, + {file = "numpy-2.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:456e3b11cb79ac9946c822a56346ec80275eaf2950314b249b512896c0d2505e"}, + {file = "numpy-2.1.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a84498e0d0a1174f2b3ed769b67b656aa5460c92c9554039e11f20a05650f00d"}, + {file = "numpy-2.1.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4d6ec0d4222e8ffdab1744da2560f07856421b367928026fb540e1945f2eeeaf"}, + {file = "numpy-2.1.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:259ec80d54999cc34cd1eb8ded513cb053c3bf4829152a2e00de2371bd406f5e"}, + {file = "numpy-2.1.2-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:675c741d4739af2dc20cd6c6a5c4b7355c728167845e3c6b0e824e4e5d36a6c3"}, + {file = "numpy-2.1.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05b2d4e667895cc55e3ff2b56077e4c8a5604361fc21a042845ea3ad67465aa8"}, + {file = "numpy-2.1.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43cca367bf94a14aca50b89e9bc2061683116cfe864e56740e083392f533ce7a"}, + {file = "numpy-2.1.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:76322dcdb16fccf2ac56f99048af32259dcc488d9b7e25b51e5eca5147a3fb98"}, + {file = "numpy-2.1.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:32e16a03138cabe0cb28e1007ee82264296ac0983714094380b408097a418cfe"}, + {file = "numpy-2.1.2-cp313-cp313-win32.whl", hash = "sha256:242b39d00e4944431a3cd2db2f5377e15b5785920421993770cddb89992c3f3a"}, + {file = "numpy-2.1.2-cp313-cp313-win_amd64.whl", hash = "sha256:f2ded8d9b6f68cc26f8425eda5d3877b47343e68ca23d0d0846f4d312ecaa445"}, + {file = "numpy-2.1.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2ffef621c14ebb0188a8633348504a35c13680d6da93ab5cb86f4e54b7e922b5"}, + {file = "numpy-2.1.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ad369ed238b1959dfbade9018a740fb9392c5ac4f9b5173f420bd4f37ba1f7a0"}, + {file = "numpy-2.1.2-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d82075752f40c0ddf57e6e02673a17f6cb0f8eb3f587f63ca1eaab5594da5b17"}, + {file = "numpy-2.1.2-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:1600068c262af1ca9580a527d43dc9d959b0b1d8e56f8a05d830eea39b7c8af6"}, + {file = "numpy-2.1.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a26ae94658d3ba3781d5e103ac07a876b3e9b29db53f68ed7df432fd033358a8"}, + {file = "numpy-2.1.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13311c2db4c5f7609b462bc0f43d3c465424d25c626d95040f073e30f7570e35"}, + {file = "numpy-2.1.2-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:2abbf905a0b568706391ec6fa15161fad0fb5d8b68d73c461b3c1bab6064dd62"}, + {file = "numpy-2.1.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:ef444c57d664d35cac4e18c298c47d7b504c66b17c2ea91312e979fcfbdfb08a"}, + {file = "numpy-2.1.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:bdd407c40483463898b84490770199d5714dcc9dd9b792f6c6caccc523c00952"}, + {file = "numpy-2.1.2-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:da65fb46d4cbb75cb417cddf6ba5e7582eb7bb0b47db4b99c9fe5787ce5d91f5"}, + {file = "numpy-2.1.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c193d0b0238638e6fc5f10f1b074a6993cb13b0b431f64079a509d63d3aa8b7"}, + {file = "numpy-2.1.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a7d80b2e904faa63068ead63107189164ca443b42dd1930299e0d1cb041cec2e"}, + {file = "numpy-2.1.2.tar.gz", hash = "sha256:13532a088217fa624c99b843eeb54640de23b3414b14aa66d023805eb731066c"}, ] [[package]] @@ -1427,9 +1430,9 @@ files = [ [package.dependencies] numpy = [ + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, {version = ">=1.22.4", markers = "python_version < \"3.11\""}, {version = ">=1.23.2", markers = "python_version == \"3.11\""}, - {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -1582,8 +1585,8 @@ files = [ annotated-types = ">=0.6.0" pydantic-core = "2.23.4" typing-extensions = [ - {version = ">=4.6.1", markers = "python_version < \"3.13\""}, {version = ">=4.12.2", markers = "python_version >= \"3.13\""}, + {version = ">=4.6.1", markers = "python_version < \"3.13\""}, ] [package.extras] @@ -1733,13 +1736,13 @@ jsonasobj = ">=1.2.1" [[package]] name = "pymdown-extensions" -version = "10.9" +version = "10.11.2" description = "Extension pack for Python Markdown." optional = false python-versions = ">=3.8" files = [ - {file = "pymdown_extensions-10.9-py3-none-any.whl", hash = "sha256:d323f7e90d83c86113ee78f3fe62fc9dee5f56b54d912660703ea1816fed5626"}, - {file = "pymdown_extensions-10.9.tar.gz", hash = "sha256:6ff740bcd99ec4172a938970d42b96128bdc9d4b9bcad72494f29921dc69b753"}, + {file = "pymdown_extensions-10.11.2-py3-none-any.whl", hash = "sha256:41cdde0a77290e480cf53892f5c5e50921a7ee3e5cd60ba91bf19837b33badcf"}, + {file = "pymdown_extensions-10.11.2.tar.gz", hash = "sha256:bc8847ecc9e784a098efd35e20cba772bc5a1b529dfcef9dc1972db9021a1049"}, ] [package.dependencies] @@ -1751,70 +1754,70 @@ extra = ["pygments (>=2.12)"] [[package]] name = "pymongo" -version = "4.9.1" +version = "4.10.1" description = "Python driver for MongoDB " optional = false python-versions = ">=3.8" files = [ - {file = "pymongo-4.9.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dc3d070d746ab79e9b393a5c236df20e56607389af2b79bf1bfe9a841117558e"}, - {file = "pymongo-4.9.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fe709d05654c12fc513617c8d5c8d05b7e9cf1d5d94ada68add4e89530c867d2"}, - {file = "pymongo-4.9.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa4493f304b33c5d2ecee3055c98889ac6724d56f5f922d47420a45d0d4099c9"}, - {file = "pymongo-4.9.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8e8b8deba6a4bff3dd5421071083219521c74d2acae0322de5c06f1a66c56af"}, - {file = "pymongo-4.9.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e3645aff8419ca60f9ccd08966b2f6b0d78053f9f98a814d025426f1d874c19a"}, - {file = "pymongo-4.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51dbc6251c6783dfcc7d657c346986d8bad7210989b2fe15de16db5204a8e7ae"}, - {file = "pymongo-4.9.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d7aa9cc2d92e73bdb036c578ba019da94ea165eb147e691cd910a6fab7ce3b7"}, - {file = "pymongo-4.9.1-cp310-cp310-win32.whl", hash = "sha256:8b632e01617f2608880f7b9926f54a5f5ebb51631996e0540fff7fc7980663c9"}, - {file = "pymongo-4.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:f05e34d401be871d7c87cb10727d49315444e4ded07ff876a595e4c23b7436da"}, - {file = "pymongo-4.9.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6bb3d5282278594753089dc7da48bfae4a7f337a2dd4d397eabb591c649e58d0"}, - {file = "pymongo-4.9.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8f0d5258bc85a4e6b5bcae8160628168e71ec4625a58ceb53327c3280a0b6914"}, - {file = "pymongo-4.9.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96462fb2175f740701d229f52018ea6e4adc4148c4112e6628bb359dd534a3df"}, - {file = "pymongo-4.9.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:286fb275267f0293364ba579f6354452599161f1902ad411061c7f744ab88328"}, - {file = "pymongo-4.9.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4cddb51cead9700c4dccc916952bc0321b8d766bf782d374bfa0e93ef47c1d20"}, - {file = "pymongo-4.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d79f20f9c7cbc1c708fb80b648b6fbd3220fd3437a9bd6017c1eb592e03b361"}, - {file = "pymongo-4.9.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dd3352eaf578f8e9bdea7a5692910eedad1e8680f60726fc70e99c8af51a5449"}, - {file = "pymongo-4.9.1-cp311-cp311-win32.whl", hash = "sha256:ea3f0196e7c311b9944a609ac175bd91ab97952164a1246716fdd38d53ca3bcc"}, - {file = "pymongo-4.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:b4c793db8457c856f333f396798470b9bfe405e17c307d581532c74cec70150c"}, - {file = "pymongo-4.9.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:47b4896544095d172c366dd4d4ea1da6b0ab1a77d8416897cc1801e2421b1e67"}, - {file = "pymongo-4.9.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fbb1c7dfcf6c44e9e1928290631c7603817991cdf570691c9e15fca594918435"}, - {file = "pymongo-4.9.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7689da1d1b444284e4ea9ab2eb64a15307b6b795918c0f3cd7774dd1d8a7556"}, - {file = "pymongo-4.9.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7f962d74201c772555f7a78792fed820a5ea76db5c7ee6cf43748e411b44e430"}, - {file = "pymongo-4.9.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08fbab69f3fb6f8088c81f4c4a8abd84a99c132034f5e27e47f894bbcb6bf439"}, - {file = "pymongo-4.9.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4327c0d9bd616b8289691360f2d4a09a72fe35479795832eae0d4ff78af53923"}, - {file = "pymongo-4.9.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:34e4993ae78be56f9e27a141168a1ab78253576fa3e893fa335a719ce204c3ef"}, - {file = "pymongo-4.9.1-cp312-cp312-win32.whl", hash = "sha256:e1f346811d4a2369f88ab7a6f886fa9c3bbc9ed4e4f4a3becca8717a73d465cb"}, - {file = "pymongo-4.9.1-cp312-cp312-win_amd64.whl", hash = "sha256:a2b12c74cfd90147babb77f9728646bcedfdbd2bd2a5b4130a00e3a0af1a3d34"}, - {file = "pymongo-4.9.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a40ea8bc9cffb61c5c9c426c430d22235e085e610ee81ae075ddf51f12f76236"}, - {file = "pymongo-4.9.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:75d5974f874acdb2f125bdbe785045b23a39ecce1d3143dd5712800c7b6d25eb"}, - {file = "pymongo-4.9.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f23a046531030318622414f21198e232cf93c5640da9a80b45596a059c8cc090"}, - {file = "pymongo-4.9.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91b1a92214c3912af5467f77c2f6435cd76f6de64c70cba7bb4ee43eba7f459e"}, - {file = "pymongo-4.9.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3a846423c4535428f69a90a1451df3718bc59f0c4ab685b9e96d3071951e0be4"}, - {file = "pymongo-4.9.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d476d91a5c9e6c37bc8ec3fb294e1c01d95736ccf01a59bb1540fe2f710f826e"}, - {file = "pymongo-4.9.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:172d8ba0f567e351a18765db23dab7dbcfdffd91a8788d90d46b350f80a40781"}, - {file = "pymongo-4.9.1-cp313-cp313-win32.whl", hash = "sha256:95418e334629440f70fe5ceeefc6cbbd50defb566901c8d68179ffbaec8d5f01"}, - {file = "pymongo-4.9.1-cp313-cp313-win_amd64.whl", hash = "sha256:1dfd2aa30174d36a3ef1dae4ee4c89710c2d65cac52ce6e13f17c710edbd61cf"}, - {file = "pymongo-4.9.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c4204fad54830a3173a5c939cd052d0561fba03dba7e0ff6852fd631f3314aa4"}, - {file = "pymongo-4.9.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:375765ec81b1f0a26d08928afea0c3dff897c36080a090be53fc7b70cc51d497"}, - {file = "pymongo-4.9.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d1b959a3dda0775d9111622ee47ad47772aed3a9da2e7d5f2f513fa68175dea"}, - {file = "pymongo-4.9.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:42c19d2b094cdd0ead7dbb38860bbe8268c140334ce55d8b39204ddb4ebd4904"}, - {file = "pymongo-4.9.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1fac1def9e9073f1c80198c99f0ec39c2528236c8912d96d7fd3b0237f4c523a"}, - {file = "pymongo-4.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b347052d510989d1f52b8553b31297f21cf74bd9f6aed71ee84e563492f4ff17"}, - {file = "pymongo-4.9.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1b4b961fce213f2bcdc92268f85111a3668c61b9b4d4e7ece27dce3a137cfcbd"}, - {file = "pymongo-4.9.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a0b10cf51ec14a487c94709d294c00e1fb6a0a4c38cdc3acfb2ced5ef60972a0"}, - {file = "pymongo-4.9.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:679b8d55854da7c7fdb82aa5e092ab4de0144daf6758defed8ab00ff9ce05360"}, - {file = "pymongo-4.9.1-cp38-cp38-win32.whl", hash = "sha256:432ad395d2233056b042ccc73234e7136aa65d944d6bd8b5138394bd38aaff79"}, - {file = "pymongo-4.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:9fbe9fad27619ac4cfda5df0ade26a99906da7dfe7b01deddc25997eb1804e4c"}, - {file = "pymongo-4.9.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:99b611ff75b5d9e17183dcf9584a7b04f9db07e51a162f23ea05e485e0735c0a"}, - {file = "pymongo-4.9.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8089003a99127f917bdbeec177d41cef019cda8ec70534c1018cb60aacd23c2a"}, - {file = "pymongo-4.9.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d78adf25967c06298c7e488f4cfab79a390fc32c2b1d428613976f99031603d"}, - {file = "pymongo-4.9.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:56877cfcdf7dfc5c6408e4551ec0d6d65ebbca4d744a0bc90400f09ef6bbcc8a"}, - {file = "pymongo-4.9.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16d2efe559d0d96bc0b74b3ff76701ad6f6e1a65f6581b573dcacc29158131c8"}, - {file = "pymongo-4.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f838f613e74b4dad8ace0d90f42346005bece4eda5bf6d389cfadb8322d39316"}, - {file = "pymongo-4.9.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:db5b299e11284f8d82ce2983d8e19fcc28f98f902a179709ef1982b4cca6f8b8"}, - {file = "pymongo-4.9.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b23211c031b45d0f32de83ab7d77f9c26f1025c2d2c91463a5d8594a16103655"}, - {file = "pymongo-4.9.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:687cf70e096381bc65b4273a6a9319617618f7ace65caffc356e1099c4a68511"}, - {file = "pymongo-4.9.1-cp39-cp39-win32.whl", hash = "sha256:e02b03e3815b80a63e773e4c32aed3cf5633d406f376477be74550295c211256"}, - {file = "pymongo-4.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:0492ef43f3342354cf581712e431621c221f60c877ebded84e3f3e53b71bbbe0"}, - {file = "pymongo-4.9.1.tar.gz", hash = "sha256:b7f2d34390acf60e229c30037d1473fcf69f4536cd7f48f6f78c0c931c61c505"}, + {file = "pymongo-4.10.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e699aa68c4a7dea2ab5a27067f7d3e08555f8d2c0dc6a0c8c60cfd9ff2e6a4b1"}, + {file = "pymongo-4.10.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:70645abc714f06b4ad6b72d5bf73792eaad14e3a2cfe29c62a9c81ada69d9e4b"}, + {file = "pymongo-4.10.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ae2fd94c9fe048c94838badcc6e992d033cb9473eb31e5710b3707cba5e8aee2"}, + {file = "pymongo-4.10.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5ded27a4a5374dae03a92e084a60cdbcecd595306555bda553b833baf3fc4868"}, + {file = "pymongo-4.10.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1ecc2455e3974a6c429687b395a0bc59636f2d6aedf5785098cf4e1f180f1c71"}, + {file = "pymongo-4.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a920fee41f7d0259f5f72c1f1eb331bc26ffbdc952846f9bd8c3b119013bb52c"}, + {file = "pymongo-4.10.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e0a15665b2d6cf364f4cd114d62452ce01d71abfbd9c564ba8c74dcd7bbd6822"}, + {file = "pymongo-4.10.1-cp310-cp310-win32.whl", hash = "sha256:29e1c323c28a4584b7095378ff046815e39ff82cdb8dc4cc6dfe3acf6f9ad1f8"}, + {file = "pymongo-4.10.1-cp310-cp310-win_amd64.whl", hash = "sha256:88dc4aa45f8744ccfb45164aedb9a4179c93567bbd98a33109d7dc400b00eb08"}, + {file = "pymongo-4.10.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:57ee6becae534e6d47848c97f6a6dff69e3cce7c70648d6049bd586764febe59"}, + {file = "pymongo-4.10.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6f437a612f4d4f7aca1812311b1e84477145e950fdafe3285b687ab8c52541f3"}, + {file = "pymongo-4.10.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a970fd3117ab40a4001c3dad333bbf3c43687d90f35287a6237149b5ccae61d"}, + {file = "pymongo-4.10.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7c4d0e7cd08ef9f8fbf2d15ba281ed55604368a32752e476250724c3ce36c72e"}, + {file = "pymongo-4.10.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ca6f700cff6833de4872a4e738f43123db34400173558b558ae079b5535857a4"}, + {file = "pymongo-4.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cec237c305fcbeef75c0bcbe9d223d1e22a6e3ba1b53b2f0b79d3d29c742b45b"}, + {file = "pymongo-4.10.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3337804ea0394a06e916add4e5fac1c89902f1b6f33936074a12505cab4ff05"}, + {file = "pymongo-4.10.1-cp311-cp311-win32.whl", hash = "sha256:778ac646ce6ac1e469664062dfe9ae1f5c9961f7790682809f5ec3b8fda29d65"}, + {file = "pymongo-4.10.1-cp311-cp311-win_amd64.whl", hash = "sha256:9df4ab5594fdd208dcba81be815fa8a8a5d8dedaf3b346cbf8b61c7296246a7a"}, + {file = "pymongo-4.10.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fbedc4617faa0edf423621bb0b3b8707836687161210d470e69a4184be9ca011"}, + {file = "pymongo-4.10.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7bd26b2aec8ceeb95a5d948d5cc0f62b0eb6d66f3f4230705c1e3d3d2c04ec76"}, + {file = "pymongo-4.10.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb104c3c2a78d9d85571c8ac90ec4f95bca9b297c6eee5ada71fabf1129e1674"}, + {file = "pymongo-4.10.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4924355245a9c79f77b5cda2db36e0f75ece5faf9f84d16014c0a297f6d66786"}, + {file = "pymongo-4.10.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:11280809e5dacaef4971113f0b4ff4696ee94cfdb720019ff4fa4f9635138252"}, + {file = "pymongo-4.10.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5d55f2a82e5eb23795f724991cac2bffbb1c0f219c0ba3bf73a835f97f1bb2e"}, + {file = "pymongo-4.10.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e974ab16a60be71a8dfad4e5afccf8dd05d41c758060f5d5bda9a758605d9a5d"}, + {file = "pymongo-4.10.1-cp312-cp312-win32.whl", hash = "sha256:544890085d9641f271d4f7a47684450ed4a7344d6b72d5968bfae32203b1bb7c"}, + {file = "pymongo-4.10.1-cp312-cp312-win_amd64.whl", hash = "sha256:dcc07b1277e8b4bf4d7382ca133850e323b7ab048b8353af496d050671c7ac52"}, + {file = "pymongo-4.10.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:90bc6912948dfc8c363f4ead54d54a02a15a7fee6cfafb36dc450fc8962d2cb7"}, + {file = "pymongo-4.10.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:594dd721b81f301f33e843453638e02d92f63c198358e5a0fa8b8d0b1218dabc"}, + {file = "pymongo-4.10.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0783e0c8e95397c84e9cf8ab092ab1e5dd7c769aec0ef3a5838ae7173b98dea0"}, + {file = "pymongo-4.10.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6fb6a72e88df46d1c1040fd32cd2d2c5e58722e5d3e31060a0393f04ad3283de"}, + {file = "pymongo-4.10.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2e3a593333e20c87415420a4fb76c00b7aae49b6361d2e2205b6fece0563bf40"}, + {file = "pymongo-4.10.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72e2ace7456167c71cfeca7dcb47bd5dceda7db2231265b80fc625c5e8073186"}, + {file = "pymongo-4.10.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ad05eb9c97e4f589ed9e74a00fcaac0d443ccd14f38d1258eb4c39a35dd722b"}, + {file = "pymongo-4.10.1-cp313-cp313-win32.whl", hash = "sha256:ee4c86d8e6872a61f7888fc96577b0ea165eb3bdb0d841962b444fa36001e2bb"}, + {file = "pymongo-4.10.1-cp313-cp313-win_amd64.whl", hash = "sha256:45ee87a4e12337353242bc758accc7fb47a2f2d9ecc0382a61e64c8f01e86708"}, + {file = "pymongo-4.10.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:442ca247f53ad24870a01e80a71cd81b3f2318655fd9d66748ee2bd1b1569d9e"}, + {file = "pymongo-4.10.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:23e1d62df5592518204943b507be7b457fb8a4ad95a349440406fd42db5d0923"}, + {file = "pymongo-4.10.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6131bc6568b26e7495a9f3ef2b1700566b76bbecd919f4472bfe90038a61f425"}, + {file = "pymongo-4.10.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fdeba88c540c9ed0338c0b2062d9f81af42b18d6646b3e6dda05cf6edd46ada9"}, + {file = "pymongo-4.10.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:15a624d752dd3c89d10deb0ef6431559b6d074703cab90a70bb849ece02adc6b"}, + {file = "pymongo-4.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba164e73fdade9b4614a2497321c5b7512ddf749ed508950bdecc28d8d76a2d9"}, + {file = "pymongo-4.10.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9235fa319993405ae5505bf1333366388add2e06848db7b3deee8f990b69808e"}, + {file = "pymongo-4.10.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e4a65567bd17d19f03157c7ec992c6530eafd8191a4e5ede25566792c4fe3fa2"}, + {file = "pymongo-4.10.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:f1945d48fb9b8a87d515da07f37e5b2c35b364a435f534c122e92747881f4a7c"}, + {file = "pymongo-4.10.1-cp38-cp38-win32.whl", hash = "sha256:345f8d340802ebce509f49d5833cc913da40c82f2e0daf9f60149cacc9ca680f"}, + {file = "pymongo-4.10.1-cp38-cp38-win_amd64.whl", hash = "sha256:3a70d5efdc0387ac8cd50f9a5f379648ecfc322d14ec9e1ba8ec957e5d08c372"}, + {file = "pymongo-4.10.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:15b1492cc5c7cd260229590be7218261e81684b8da6d6de2660cf743445500ce"}, + {file = "pymongo-4.10.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:95207503c41b97e7ecc7e596d84a61f441b4935f11aa8332828a754e7ada8c82"}, + {file = "pymongo-4.10.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb99f003c720c6d83be02c8f1a7787c22384a8ca9a4181e406174db47a048619"}, + {file = "pymongo-4.10.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f2bc1ee4b1ca2c4e7e6b7a5e892126335ec8d9215bcd3ac2fe075870fefc3358"}, + {file = "pymongo-4.10.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:93a0833c10a967effcd823b4e7445ec491f0bf6da5de0ca33629c0528f42b748"}, + {file = "pymongo-4.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f56707497323150bd2ed5d63067f4ffce940d0549d4ea2dfae180deec7f9363"}, + {file = "pymongo-4.10.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:409ab7d6c4223e5c85881697f365239dd3ed1b58f28e4124b846d9d488c86880"}, + {file = "pymongo-4.10.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dac78a650dc0637d610905fd06b5fa6419ae9028cf4d04d6a2657bc18a66bbce"}, + {file = "pymongo-4.10.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1ec3fa88b541e0481aff3c35194c9fac96e4d57ec5d1c122376000eb28c01431"}, + {file = "pymongo-4.10.1-cp39-cp39-win32.whl", hash = "sha256:e0e961923a7b8a1c801c43552dcb8153e45afa41749d9efbd3a6d33f45489f7a"}, + {file = "pymongo-4.10.1-cp39-cp39-win_amd64.whl", hash = "sha256:dabe8bf1ad644e6b93f3acf90ff18536d94538ca4d27e583c6db49889e98e48f"}, + {file = "pymongo-4.10.1.tar.gz", hash = "sha256:a9de02be53b6bb98efe0b9eda84ffa1ec027fcb23a2de62c4f941d9a2f2f3330"}, ] [package.dependencies] @@ -2534,24 +2537,24 @@ python-versions = ">=3.6" files = [ {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b42169467c42b692c19cf539c38d4602069d8c1505e97b86387fcf7afb766e1d"}, {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:07238db9cbdf8fc1e9de2489a4f68474e70dffcb32232db7c08fa61ca0c7c462"}, - {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:d92f81886165cb14d7b067ef37e142256f1c6a90a65cd156b063a43da1708cfd"}, {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:fff3573c2db359f091e1589c3d7c5fc2f86f5bdb6f24252c2d8e539d4e45f412"}, + {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-manylinux_2_24_aarch64.whl", hash = "sha256:aa2267c6a303eb483de8d02db2871afb5c5fc15618d894300b88958f729ad74f"}, {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:840f0c7f194986a63d2c2465ca63af8ccbbc90ab1c6001b1978f05119b5e7334"}, {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:024cfe1fc7c7f4e1aff4a81e718109e13409767e4f871443cbff3dba3578203d"}, {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-win32.whl", hash = "sha256:c69212f63169ec1cfc9bb44723bf2917cbbd8f6191a00ef3410f5a7fe300722d"}, {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-win_amd64.whl", hash = "sha256:cabddb8d8ead485e255fe80429f833172b4cadf99274db39abc080e068cbcc31"}, {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:bef08cd86169d9eafb3ccb0a39edb11d8e25f3dae2b28f5c52fd997521133069"}, {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:b16420e621d26fdfa949a8b4b47ade8810c56002f5389970db4ddda51dbff248"}, - {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:b5edda50e5e9e15e54a6a8a0070302b00c518a9d32accc2346ad6c984aacd279"}, {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:25c515e350e5b739842fc3228d662413ef28f295791af5e5110b543cf0b57d9b"}, + {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-manylinux_2_24_aarch64.whl", hash = "sha256:1707814f0d9791df063f8c19bb51b0d1278b8e9a2353abbb676c2f685dee6afe"}, {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:46d378daaac94f454b3a0e3d8d78cafd78a026b1d71443f4966c696b48a6d899"}, {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:09b055c05697b38ecacb7ac50bdab2240bfca1a0c4872b0fd309bb07dc9aa3a9"}, {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-win32.whl", hash = "sha256:53a300ed9cea38cf5a2a9b069058137c2ca1ce658a874b79baceb8f892f915a7"}, {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-win_amd64.whl", hash = "sha256:c2a72e9109ea74e511e29032f3b670835f8a59bbdc9ce692c5b4ed91ccf1eedb"}, {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:ebc06178e8821efc9692ea7544aa5644217358490145629914d8020042c24aa1"}, {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:edaef1c1200c4b4cb914583150dcaa3bc30e592e907c01117c08b13a07255ec2"}, - {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:7048c338b6c86627afb27faecf418768acb6331fc24cfa56c93e8c9780f815fa"}, {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d176b57452ab5b7028ac47e7b3cf644bcfdc8cacfecf7e71759f7f51a59e5c92"}, + {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-manylinux_2_24_aarch64.whl", hash = "sha256:1dc67314e7e1086c9fdf2680b7b6c2be1c0d8e3a8279f2e993ca2a7545fecf62"}, {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:3213ece08ea033eb159ac52ae052a4899b56ecc124bb80020d9bbceeb50258e9"}, {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:aab7fd643f71d7946f2ee58cc88c9b7bfc97debd71dcc93e03e2d174628e7e2d"}, {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-win32.whl", hash = "sha256:5c365d91c88390c8d0a8545df0b5857172824b1c604e867161e6b3d59a827eaa"}, @@ -2559,7 +2562,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.8-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a5aa27bad2bb83670b71683aae140a1f52b0857a2deff56ad3f6c13a017a26ed"}, {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c58ecd827313af6864893e7af0a3bb85fd529f862b6adbefe14643947cfe2942"}, {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-macosx_12_0_arm64.whl", hash = "sha256:f481f16baec5290e45aebdc2a5168ebc6d35189ae6fea7a58787613a25f6e875"}, - {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:3fcc54cb0c8b811ff66082de1680b4b14cf8a81dce0d4fbf665c2265a81e07a1"}, + {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-manylinux_2_24_aarch64.whl", hash = "sha256:77159f5d5b5c14f7c34073862a6b7d34944075d9f93e681638f6d753606c6ce6"}, {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7f67a1ee819dc4562d444bbafb135832b0b909f81cc90f7aa00260968c9ca1b3"}, {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:4ecbf9c3e19f9562c7fdd462e8d18dd902a47ca046a2e64dba80699f0b6c09b7"}, {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:87ea5ff66d8064301a154b3933ae406b0863402a799b16e4a1d24d9fbbcbe0d3"}, @@ -2567,7 +2570,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-win_amd64.whl", hash = "sha256:3f215c5daf6a9d7bbed4a0a4f760f3113b10e82ff4c5c44bec20a68c8014f675"}, {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1b617618914cb00bf5c34d4357c37aa15183fa229b24767259657746c9077615"}, {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:a6a9ffd280b71ad062eae53ac1659ad86a17f59a0fdc7699fd9be40525153337"}, - {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:665f58bfd29b167039f714c6998178d27ccd83984084c286110ef26b230f259f"}, + {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-manylinux_2_24_aarch64.whl", hash = "sha256:305889baa4043a09e5b76f8e2a51d4ffba44259f6b4c72dec8ca56207d9c6fe1"}, {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:700e4ebb569e59e16a976857c8798aee258dceac7c7d6b50cab63e080058df91"}, {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:e2b4c44b60eadec492926a7270abb100ef9f72798e18743939bdbf037aab8c28"}, {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e79e5db08739731b0ce4850bed599235d601701d5694c36570a99a0c5ca41a9d"}, @@ -2575,7 +2578,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-win_amd64.whl", hash = "sha256:56f4252222c067b4ce51ae12cbac231bce32aee1d33fbfc9d17e5b8d6966c312"}, {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:03d1162b6d1df1caa3a4bd27aa51ce17c9afc2046c31b0ad60a0a96ec22f8001"}, {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:bba64af9fa9cebe325a62fa398760f5c7206b215201b0ec825005f1b18b9bccf"}, - {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:9eb5dee2772b0f704ca2e45b1713e4e5198c18f515b52743576d196348f374d3"}, + {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-manylinux_2_24_aarch64.whl", hash = "sha256:a1a45e0bb052edf6a1d3a93baef85319733a888363938e1fc9924cb00c8df24c"}, {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:da09ad1c359a728e112d60116f626cc9f29730ff3e0e7db72b9a2dbc2e4beed5"}, {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:184565012b60405d93838167f425713180b949e9d8dd0bbc7b49f074407c5a8b"}, {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a75879bacf2c987c003368cf14bed0ffe99e8e85acfa6c0bfffc21a090f16880"}, @@ -2793,24 +2796,24 @@ sqlcipher = ["sqlcipher3_binary"] [[package]] name = "tomli" -version = "2.0.1" +version = "2.0.2" description = "A lil' TOML parser" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, - {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, + {file = "tomli-2.0.2-py3-none-any.whl", hash = "sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38"}, + {file = "tomli-2.0.2.tar.gz", hash = "sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed"}, ] [[package]] name = "types-python-dateutil" -version = "2.9.0.20240906" +version = "2.9.0.20241003" description = "Typing stubs for python-dateutil" optional = false python-versions = ">=3.8" files = [ - {file = "types-python-dateutil-2.9.0.20240906.tar.gz", hash = "sha256:9706c3b68284c25adffc47319ecc7947e5bb86b3773f843c73906fd598bc176e"}, - {file = "types_python_dateutil-2.9.0.20240906-py3-none-any.whl", hash = "sha256:27c8cc2d058ccb14946eebcaaa503088f4f6dbc4fb6093d3d456a49aef2753f6"}, + {file = "types-python-dateutil-2.9.0.20241003.tar.gz", hash = "sha256:58cb85449b2a56d6684e41aeefb4c4280631246a0da1a719bdbe6f3fb0317446"}, + {file = "types_python_dateutil-2.9.0.20241003-py3-none-any.whl", hash = "sha256:250e1d8e80e7bbc3a6c99b907762711d1a1cdd00e978ad39cb5940f6f0a87f3d"}, ] [[package]] @@ -2826,13 +2829,13 @@ files = [ [[package]] name = "tzdata" -version = "2024.1" +version = "2024.2" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" files = [ - {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"}, - {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"}, + {file = "tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd"}, + {file = "tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc"}, ] [[package]] @@ -2868,41 +2871,41 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "watchdog" -version = "5.0.2" +version = "5.0.3" description = "Filesystem events monitoring" optional = false python-versions = ">=3.9" files = [ - {file = "watchdog-5.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d961f4123bb3c447d9fcdcb67e1530c366f10ab3a0c7d1c0c9943050936d4877"}, - {file = "watchdog-5.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72990192cb63872c47d5e5fefe230a401b87fd59d257ee577d61c9e5564c62e5"}, - {file = "watchdog-5.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6bec703ad90b35a848e05e1b40bf0050da7ca28ead7ac4be724ae5ac2653a1a0"}, - {file = "watchdog-5.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:dae7a1879918f6544201d33666909b040a46421054a50e0f773e0d870ed7438d"}, - {file = "watchdog-5.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c4a440f725f3b99133de610bfec93d570b13826f89616377715b9cd60424db6e"}, - {file = "watchdog-5.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f8b2918c19e0d48f5f20df458c84692e2a054f02d9df25e6c3c930063eca64c1"}, - {file = "watchdog-5.0.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:aa9cd6e24126d4afb3752a3e70fce39f92d0e1a58a236ddf6ee823ff7dba28ee"}, - {file = "watchdog-5.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f627c5bf5759fdd90195b0c0431f99cff4867d212a67b384442c51136a098ed7"}, - {file = "watchdog-5.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d7594a6d32cda2b49df3fd9abf9b37c8d2f3eab5df45c24056b4a671ac661619"}, - {file = "watchdog-5.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba32efcccfe2c58f4d01115440d1672b4eb26cdd6fc5b5818f1fb41f7c3e1889"}, - {file = "watchdog-5.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:963f7c4c91e3f51c998eeff1b3fb24a52a8a34da4f956e470f4b068bb47b78ee"}, - {file = "watchdog-5.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8c47150aa12f775e22efff1eee9f0f6beee542a7aa1a985c271b1997d340184f"}, - {file = "watchdog-5.0.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:14dd4ed023d79d1f670aa659f449bcd2733c33a35c8ffd88689d9d243885198b"}, - {file = "watchdog-5.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b84bff0391ad4abe25c2740c7aec0e3de316fdf7764007f41e248422a7760a7f"}, - {file = "watchdog-5.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3e8d5ff39f0a9968952cce548e8e08f849141a4fcc1290b1c17c032ba697b9d7"}, - {file = "watchdog-5.0.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:fb223456db6e5f7bd9bbd5cd969f05aae82ae21acc00643b60d81c770abd402b"}, - {file = "watchdog-5.0.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:9814adb768c23727a27792c77812cf4e2fd9853cd280eafa2bcfa62a99e8bd6e"}, - {file = "watchdog-5.0.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:901ee48c23f70193d1a7bc2d9ee297df66081dd5f46f0ca011be4f70dec80dab"}, - {file = "watchdog-5.0.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:638bcca3d5b1885c6ec47be67bf712b00a9ab3d4b22ec0881f4889ad870bc7e8"}, - {file = "watchdog-5.0.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5597c051587f8757798216f2485e85eac583c3b343e9aa09127a3a6f82c65ee8"}, - {file = "watchdog-5.0.2-py3-none-manylinux2014_armv7l.whl", hash = "sha256:53ed1bf71fcb8475dd0ef4912ab139c294c87b903724b6f4a8bd98e026862e6d"}, - {file = "watchdog-5.0.2-py3-none-manylinux2014_i686.whl", hash = "sha256:29e4a2607bd407d9552c502d38b45a05ec26a8e40cc7e94db9bb48f861fa5abc"}, - {file = "watchdog-5.0.2-py3-none-manylinux2014_ppc64.whl", hash = "sha256:b6dc8f1d770a8280997e4beae7b9a75a33b268c59e033e72c8a10990097e5fde"}, - {file = "watchdog-5.0.2-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:d2ab34adc9bf1489452965cdb16a924e97d4452fcf88a50b21859068b50b5c3b"}, - {file = "watchdog-5.0.2-py3-none-manylinux2014_s390x.whl", hash = "sha256:7d1aa7e4bb0f0c65a1a91ba37c10e19dabf7eaaa282c5787e51371f090748f4b"}, - {file = "watchdog-5.0.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:726eef8f8c634ac6584f86c9c53353a010d9f311f6c15a034f3800a7a891d941"}, - {file = "watchdog-5.0.2-py3-none-win32.whl", hash = "sha256:bda40c57115684d0216556671875e008279dea2dc00fcd3dde126ac8e0d7a2fb"}, - {file = "watchdog-5.0.2-py3-none-win_amd64.whl", hash = "sha256:d010be060c996db725fbce7e3ef14687cdcc76f4ca0e4339a68cc4532c382a73"}, - {file = "watchdog-5.0.2-py3-none-win_ia64.whl", hash = "sha256:3960136b2b619510569b90f0cd96408591d6c251a75c97690f4553ca88889769"}, - {file = "watchdog-5.0.2.tar.gz", hash = "sha256:dcebf7e475001d2cdeb020be630dc5b687e9acdd60d16fea6bb4508e7b94cf76"}, + {file = "watchdog-5.0.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:85527b882f3facda0579bce9d743ff7f10c3e1e0db0a0d0e28170a7d0e5ce2ea"}, + {file = "watchdog-5.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:53adf73dcdc0ef04f7735066b4a57a4cd3e49ef135daae41d77395f0b5b692cb"}, + {file = "watchdog-5.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e25adddab85f674acac303cf1f5835951345a56c5f7f582987d266679979c75b"}, + {file = "watchdog-5.0.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f01f4a3565a387080dc49bdd1fefe4ecc77f894991b88ef927edbfa45eb10818"}, + {file = "watchdog-5.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:91b522adc25614cdeaf91f7897800b82c13b4b8ac68a42ca959f992f6990c490"}, + {file = "watchdog-5.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d52db5beb5e476e6853da2e2d24dbbbed6797b449c8bf7ea118a4ee0d2c9040e"}, + {file = "watchdog-5.0.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:94d11b07c64f63f49876e0ab8042ae034674c8653bfcdaa8c4b32e71cfff87e8"}, + {file = "watchdog-5.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:349c9488e1d85d0a58e8cb14222d2c51cbc801ce11ac3936ab4c3af986536926"}, + {file = "watchdog-5.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:53a3f10b62c2d569e260f96e8d966463dec1a50fa4f1b22aec69e3f91025060e"}, + {file = "watchdog-5.0.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:950f531ec6e03696a2414b6308f5c6ff9dab7821a768c9d5788b1314e9a46ca7"}, + {file = "watchdog-5.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ae6deb336cba5d71476caa029ceb6e88047fc1dc74b62b7c4012639c0b563906"}, + {file = "watchdog-5.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1021223c08ba8d2d38d71ec1704496471ffd7be42cfb26b87cd5059323a389a1"}, + {file = "watchdog-5.0.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:752fb40efc7cc8d88ebc332b8f4bcbe2b5cc7e881bccfeb8e25054c00c994ee3"}, + {file = "watchdog-5.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a2e8f3f955d68471fa37b0e3add18500790d129cc7efe89971b8a4cc6fdeb0b2"}, + {file = "watchdog-5.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b8ca4d854adcf480bdfd80f46fdd6fb49f91dd020ae11c89b3a79e19454ec627"}, + {file = "watchdog-5.0.3-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:90a67d7857adb1d985aca232cc9905dd5bc4803ed85cfcdcfcf707e52049eda7"}, + {file = "watchdog-5.0.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:720ef9d3a4f9ca575a780af283c8fd3a0674b307651c1976714745090da5a9e8"}, + {file = "watchdog-5.0.3-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:223160bb359281bb8e31c8f1068bf71a6b16a8ad3d9524ca6f523ac666bb6a1e"}, + {file = "watchdog-5.0.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:560135542c91eaa74247a2e8430cf83c4342b29e8ad4f520ae14f0c8a19cfb5b"}, + {file = "watchdog-5.0.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:dd021efa85970bd4824acacbb922066159d0f9e546389a4743d56919b6758b91"}, + {file = "watchdog-5.0.3-py3-none-manylinux2014_armv7l.whl", hash = "sha256:78864cc8f23dbee55be34cc1494632a7ba30263951b5b2e8fc8286b95845f82c"}, + {file = "watchdog-5.0.3-py3-none-manylinux2014_i686.whl", hash = "sha256:1e9679245e3ea6498494b3028b90c7b25dbb2abe65c7d07423ecfc2d6218ff7c"}, + {file = "watchdog-5.0.3-py3-none-manylinux2014_ppc64.whl", hash = "sha256:9413384f26b5d050b6978e6fcd0c1e7f0539be7a4f1a885061473c5deaa57221"}, + {file = "watchdog-5.0.3-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:294b7a598974b8e2c6123d19ef15de9abcd282b0fbbdbc4d23dfa812959a9e05"}, + {file = "watchdog-5.0.3-py3-none-manylinux2014_s390x.whl", hash = "sha256:26dd201857d702bdf9d78c273cafcab5871dd29343748524695cecffa44a8d97"}, + {file = "watchdog-5.0.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:0f9332243355643d567697c3e3fa07330a1d1abf981611654a1f2bf2175612b7"}, + {file = "watchdog-5.0.3-py3-none-win32.whl", hash = "sha256:c66f80ee5b602a9c7ab66e3c9f36026590a0902db3aea414d59a2f55188c1f49"}, + {file = "watchdog-5.0.3-py3-none-win_amd64.whl", hash = "sha256:f00b4cf737f568be9665563347a910f8bdc76f88c2970121c86243c8cfdf90e9"}, + {file = "watchdog-5.0.3-py3-none-win_ia64.whl", hash = "sha256:49f4d36cb315c25ea0d946e018c01bb028048023b9e103d3d3943f58e109dd45"}, + {file = "watchdog-5.0.3.tar.gz", hash = "sha256:108f42a7f0345042a854d4d0ad0834b741d421330d5f575b81cb27b883500176"}, ] [package.extras] @@ -3024,4 +3027,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "58f9c419d055b72320c6442f8e41e53fb5562823d8fcbb00fe9cc801295c4a5e" +content-hash = "66d85f7aabd3fae397f61f071a2f4e5ea0361eba5fe846213e9b5e6bcdc759c7" diff --git a/pyproject.toml b/pyproject.toml index cef61cf0..51c545b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ python = "^3.9" pymongo = "^4.3.3" pyYAML = "^6.0" requests = "^2.28.2" -nmdc-schema = "~10.8" +nmdc-schema = "^11.0.1" deepdiff = "^6.2.1" pytz = "^2023.3" python-dotenv = "^1.0.0" diff --git a/tests/conftest.py b/tests/conftest.py index 0aef261a..b8a89714 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,41 +1,77 @@ +import json import os from pymongo import MongoClient from pathlib import Path from pytest import fixture +import requests_mock +import shutil from time import time +from unittest.mock import Mock +from yaml import load, Loader -from nmdc_automation.config import Config +from nmdc_automation.config import SiteConfig +from nmdc_automation.workflow_automation.models import WorkflowConfig +from tests.fixtures import db_utils +from nmdc_automation.workflow_automation.wfutils import WorkflowJob -@fixture +@fixture(scope="session") +def mock_job_state(): + state = db_utils.read_json( + "mags_workflow_state.json" + ) + return state + + +@fixture(scope="session") +def mags_config(fixtures_dir)->WorkflowConfig: + yaml_file = fixtures_dir / "mags_config.yaml" + wf = load(open(yaml_file), Loader) + # normalize the keys from Key Name to key_name + wf = {k.replace(" ", "_").lower(): v for k, v in wf.items()} + return WorkflowConfig(**wf) + + +@fixture(scope="session") def test_db(): conn_str = os.environ.get("MONGO_URL", "mongodb://localhost:27017") return MongoClient(conn_str).test @fixture(autouse=True) -def mock_api(monkeypatch, requests_mock): +def mock_api(monkeypatch, requests_mock, test_data_dir): monkeypatch.setenv("NMDC_API_URL", "http://localhost") monkeypatch.setenv("NMDC_CLIENT_ID", "anid") monkeypatch.setenv("NMDC_CLIENT_SECRET", "asecret") - resp = {"expires": {"minutes": time()+60}, + token_resp = {"expires": {"minutes": time()+60}, "access_token": "abcd" } - requests_mock.post("http://localhost/token", json=resp) + requests_mock.post("http://localhost/token", json=token_resp) resp = ["nmdc:abcd"] - requests_mock.post("http://localhost/pids/mint", json=["nmdc:abcd"]) + requests_mock.post("http://localhost/pids/mint", json=resp) requests_mock.post( - "http://localhost/workflows/activities", - json=["nmdc:abcd"] + "http://localhost/workflows/workflow_executions", + json=resp ) requests_mock.post("http://localhost/pids/bind", json=resp) + rqcf = test_data_dir / "rqc_response2.json" + rqc = json.load(open(rqcf)) + rqc_resp = {"resources": [rqc]} + requests_mock.get("http://localhost/jobs", json=rqc_resp) + + requests_mock.patch("http://localhost/operations/nmdc:1234", json={}) + requests_mock.get("http://localhost/operations/nmdc:1234", json={'metadata': {}}) + + @fixture(scope="session") def base_test_dir(): return Path(__file__).parent @fixture(scope="session") def fixtures_dir(base_test_dir): - return base_test_dir / "fixtures" + path = base_test_dir / "fixtures" + # get the absolute path + return path.resolve() @fixture(scope="session") def test_data_dir(base_test_dir): @@ -45,10 +81,79 @@ def test_data_dir(base_test_dir): def workflows_config_dir(base_test_dir): return base_test_dir.parent / "nmdc_automation/config/workflows" + @fixture(scope="session") -def site_config(base_test_dir): +def site_config_file(base_test_dir): return base_test_dir / "site_configuration_test.toml" @fixture(scope="session") -def job_config(site_config): - return Config(site_config) \ No newline at end of file +def site_config(site_config_file): + return SiteConfig(site_config_file) + +@fixture +def initial_state_file(fixtures_dir, tmp_path): + state_file = fixtures_dir / "initial_state.json" + # make a working copy in tmp_path + copied_state_file = tmp_path / "initial_state.json" + shutil.copy(state_file, copied_state_file) + return copied_state_file + + +# Sample Cromwell API responses +CROMWELL_SUCCESS_RESPONSE = { + "id": "cromwell-job-id-12345", + "status": "Succeeded", + "outputs": { + "output_file": "/path/to/output.txt" + } +} + +CROMWELL_FAIL_RESPONSE = { + "id": "cromwell-job-id-54321", + "status": "Failed", + "failures": [ + {"message": "Error processing job"} + ] +} + +JOB_SUBMIT_RESPONSE = { + "id": "cromwell-workflow-id", + "status": "Submitted", + "submission": "2024-10-13T12:34:56.789Z", + "workflowName": "workflow_name", + "workflowRoot": "gs://path/to/workflow/root", + "metadataSource": "Unarchived", + "outputs": {}, + "labels": { + "label1": "value1", + "label2": "value2" + }, + "parentWorkflowId": None, + "rootWorkflowId": "cromwell-root-id" +} + +@fixture +def mock_cromwell_api(fixtures_dir): + successful_job_metadata = json.load(open(fixtures_dir / 'cromwell/succeeded_metadata.json')) + with requests_mock.Mocker() as m: + # Mock the Cromwell submit job endpoint + m.post('http://localhost:8088/api/workflows/v1', json=JOB_SUBMIT_RESPONSE, status_code=201) + + # Mock Cromwell status check endpoint + m.get( + 'http://localhost:8088/api/workflows/v1/cromwell-job-id-12345/status', json={ + "id": "cromwell-job-id-12345", + "status": "Succeeded" + } + ) + + # Mock Cromwell failure scenario + m.get('http://localhost:8088/api/workflows/v1/cromwell-job-id-54321/status', json=CROMWELL_FAIL_RESPONSE) + + # Mock Cromwell metadata endpoint + m.get( + 'http://localhost:8088/api/workflows/v1/cromwell-job-id-12345/metadata', + json=successful_job_metadata + ) + + yield m \ No newline at end of file diff --git a/tests/fixtures/cromwell/succeeded_metadata.json b/tests/fixtures/cromwell/succeeded_metadata.json new file mode 100644 index 00000000..f6c4c3e5 --- /dev/null +++ b/tests/fixtures/cromwell/succeeded_metadata.json @@ -0,0 +1,78 @@ +{ + "id": "cromwell-job-id-12345", + "status": "Succeeded", + "start": "2023-09-01T10:00:00.000Z", + "end": "2023-09-01T12:00:00.000Z", + "workflowName": "example_workflow", + "submittedFiles": { + "workflow": "/path/to/workflow.wdl", + "inputs": { + "example_workflow.input_file": "/path/to/input.txt" + }, + "options": "/path/to/options.json", + "labels": { + "project": "example_project", + "version": "1.0" + } + }, + "inputs": { + "example_workflow.input_file": "/path/to/input.txt" + }, + "outputs": { + "nmdc_mags.final_checkm": "./outputs/final_checkm.json" + }, + "calls": { + "example_workflow.task1": [ + { + "shardIndex": -1, + "attempt": 1, + "executionStatus": "Done", + "start": "2023-09-01T10:10:00.000Z", + "end": "2023-09-01T10:20:00.000Z", + "stdout": "/path/to/stdout", + "stderr": "/path/to/stderr", + "backendLogs": { + "log": "/path/to/backend_log" + }, + "outputs": { + "task1_output": "/path/to/task1_output.txt" + }, + "backendStatus": "Done", + "runtimeAttributes": { + "cpu": "1", + "memory": "2GB", + "docker": "example_docker_image" + } + } + ], + "example_workflow.task2": [ + { + "shardIndex": -1, + "attempt": 1, + "executionStatus": "Done", + "start": "2023-09-01T10:30:00.000Z", + "end": "2023-09-01T10:40:00.000Z", + "stdout": "/path/to/stdout", + "stderr": "/path/to/stderr", + "backendLogs": { + "log": "/path/to/backend_log" + }, + "outputs": { + "task2_output": "/path/to/task2_output.txt" + }, + "backendStatus": "Done", + "runtimeAttributes": { + "cpu": "2", + "memory": "4GB", + "docker": "example_docker_image" + } + } + ] + }, + "workflowRoot": "/path/to/workflow/root", + "labels": { + "project": "example_project", + "version": "1.0" + }, + "submission": "2023-09-01T09:50:00.000Z" +} diff --git a/tests/fixtures/data_object_set2.json b/tests/fixtures/data_object_set2.json deleted file mode 100644 index 2e855058..00000000 --- a/tests/fixtures/data_object_set2.json +++ /dev/null @@ -1,32 +0,0 @@ -[ - { - "id" : "nmdc:dobj-11-qcstats2", - "name" : "nmdc_wfrqc-11-test001.2_filterStats.txt", - "description" : "Reads QC summary for nmdc:wfrqc-11-metag1.2", - "file_size_bytes" : 123456, - "md5_checksum" : "7172cd332a734e002c88b35827acd991", - "data_object_type" : "QC Statistics", - "url" : "https://data.microbiomedata.org", - "type" : "nmdc:DataObject" -}, -{ - "id" : "nmdc:dobj-11-qcinfo2", - "name" : "nmdc_wfrqc-11-test001.2_readsQC.info", - "description" : "Read filtering info for nmdc:wfrqc-11-metag1.2", - "file_size_bytes" : 123456, - "md5_checksum" : "d3812377eb0a57c9f2bdea5692d157fb", - "data_object_type" : "Read Filtering Info File", - "url" : "https://data.microbiomedata.org", - "type" : "nmdc:DataObject" -}, -{ - "id" : "nmdc:dobj-11-filteredreads2", - "name" : "nmdc_wfrqc-11-test001.2_filtered.fastq.gz", - "description" : "Reads QC for nmdc:wfrqc-11-metag1.2", - "file_size_bytes" : 123456, - "md5_checksum" : "fafb41665d8e00654ac0fbf80adc1b87", - "data_object_type" : "Filtered Sequencing Reads", - "url" : "https://data.microbiomedata.org", - "type" : "nmdc:DataObject" -} -] diff --git a/tests/fixtures/db_utils.py b/tests/fixtures/db_utils.py index 6cb30ce0..385e5063 100644 --- a/tests/fixtures/db_utils.py +++ b/tests/fixtures/db_utils.py @@ -9,15 +9,9 @@ FIXTURE_DIR = Path(__file__).parent COLS = [ 'data_object_set', - "omics_processing_set", - 'mags_activity_set', - 'metagenome_assembly_set', - 'metatranscriptome_assembly_set', + 'data_generation_set', 'jobs', - 'metagenome_annotation_activity_set', - 'metatranscriptome_annotation_set', - 'metatranscriptome_expression_analysis_set', - 'read_qc_analysis_activity_set' + 'workflow_execution_set', ] @@ -27,14 +21,18 @@ def read_json(fn): return data -def load_fixture(test_db, fn, col=None, reset=False): +def load_fixture(test_db, fn, col=None, reset=False, version=None): if not col: col = fn.split("/")[-1].split(".")[0] if reset: test_db[col].delete_many({}) - data = read_json(fn) + fixture_path = FIXTURE_DIR / Path('nmdc_db') / Path(fn) + data = json.load(open(fixture_path)) logging.debug("Loading %d recs into %s" % (len(data), col)) if len(data) > 0: + if version: + for d in data: + d['version'] = version test_db[col].insert_many(data) diff --git a/tests/fixtures/failed_job_state.json b/tests/fixtures/failed_job_state.json new file mode 100644 index 00000000..64f84cca --- /dev/null +++ b/tests/fixtures/failed_job_state.json @@ -0,0 +1,216 @@ +{ + "type": "MAGs: v1.3.10", + "cromwell_jobid": "9492a397-eb30-472b-9d3b-abc123456789", + "nmdc_jobid": "nmdc:66cf64b6-7462-11ef-8b84-abc123456789", + "conf": { + "git_repo": "https://github.com/microbiomedata/metaMAGs", + "release": "v1.3.10", + "wdl": "mbin_nmdc.wdl", + "activity_id": "nmdc:wfmag-11-g7msr323.1", + "activity_set": "mags_activity_set", + "was_informed_by": "nmdc:omprc-11-9cdxha98", + "trigger_activity": "nmdc:wfmgan-11-jv8kx789.1", + "iteration": 1, + "input_prefix": "nmdc_mags", + "inputs": { + "proj": "nmdc:wfmag-11-g7msr323.1", + "contig_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "sam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgas-11-0qvjnc54.1/nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "gff_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "proteins_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "cog_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "ec_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "ko_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "pfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "tigrfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "crispr_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "product_names_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "gene_phylogeny_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "lineage_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "map_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv" + }, + "input_data_objects": [ + { + "id": "nmdc:dobj-11-1x850k20", + "name": "nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "description": "Assembly contigs (remapped) for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "md5_checksum": "6debed079383eeca2045ce23b0576607", + "file_size_bytes": 2084209623, + "data_object_type": "Assembly Contigs" + }, + { + "id": "nmdc:dobj-11-fkj2kt47", + "name": "nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-9cdxha98", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgas-11-0qvjnc54.1/nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "md5_checksum": "88ec004bd037a3820060427098798666", + "file_size_bytes": 15704979428, + "data_object_type": "Assembly Coverage BAM" + }, + { + "id": "nmdc:dobj-11-f9rnav80", + "name": "nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "description": "Functional Annotation for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "md5_checksum": "349cae9b4fe62bb910f08a183e57b475", + "file_size_bytes": 1320869282, + "data_object_type": "Functional Annotation GFF" + }, + { + "id": "nmdc:dobj-11-btqzf393", + "name": "nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "description": "FASTA Amino Acid File for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "md5_checksum": "292eae73923605dae2ef9f5d582e4603", + "file_size_bytes": 1075716574, + "data_object_type": "Annotation Amino Acid FASTA" + }, + { + "id": "nmdc:dobj-11-hdty3m42", + "name": "nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "description": "COGs for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "md5_checksum": "c4d1121c1ceb1229afb7190d23553003", + "file_size_bytes": 712459544, + "data_object_type": "Clusters of Orthologous Groups (COG) Annotation GFF" + }, + { + "id": "nmdc:dobj-11-0gk70187", + "name": "nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "description": "EC Annotations for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "md5_checksum": "84cf22f39532e1bd001bea8425735a82", + "file_size_bytes": 116429630, + "data_object_type": "Annotation Enzyme Commission" + }, + { + "id": "nmdc:dobj-11-3mtmhf26", + "name": "nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "description": "KEGG Orthology for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "md5_checksum": "17d699df17c97fc28796a198cf40a328", + "file_size_bytes": 169182276, + "data_object_type": "Annotation KEGG Orthology" + }, + { + "id": "nmdc:dobj-11-7kfhf682", + "name": "nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "description": "Pfam Annotation for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "md5_checksum": "23c33758dc138e1af0f39fa1f3ca07db", + "file_size_bytes": 602929841, + "data_object_type": "Pfam Annotation GFF" + }, + { + "id": "nmdc:dobj-11-9hjg8y84", + "name": "nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "description": "TIGRFam for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "md5_checksum": "bbfded219e0b359602725c9efb4f0c54", + "file_size_bytes": 61788991, + "data_object_type": "TIGRFam Annotation GFF" + }, + { + "id": "nmdc:dobj-11-2x0wy902", + "name": "nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "description": "Crispr Terms for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "md5_checksum": "9d2255a63e39552328c4da20ccf2bb3f", + "file_size_bytes": 142989, + "data_object_type": "Crispr Terms" + }, + { + "id": "nmdc:dobj-11-r0bx4g71", + "name": "nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "description": "Product names for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "md5_checksum": "6f1325b2f8dee9b2a75598fb9645c43d", + "file_size_bytes": 401118634, + "data_object_type": "Product Names" + }, + { + "id": "nmdc:dobj-11-7mj15p44", + "name": "nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "description": "Gene Phylogeny for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "md5_checksum": "037aee803f1b81ac5ac1bccb9a18527d", + "file_size_bytes": 748420652, + "data_object_type": "Gene Phylogeny tsv" + }, + { + "id": "nmdc:dobj-11-r2zqpy26", + "name": "nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "description": "Scaffold Lineage tsv for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "md5_checksum": "efdce9771cdda8bd8548e44ef6d1d3a3", + "file_size_bytes": 503898615, + "data_object_type": "Scaffold Lineage tsv" + }, + { + "id": "nmdc:dobj-11-4k2bt072", + "name": "nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv", + "description": "Contig mappings file for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv", + "md5_checksum": "1056a6ef48ce9124de0828ee85246e65", + "file_size_bytes": 250129248, + "data_object_type": "Contig Mapping File" + } + ], + "activity": { + "name": "Metagenome Assembled Genomes Analysis Activity for {id}", + "type": "nmdc:MagsAnalysisActivity", + "binned_contig_num": "{outputs.final_stats_json.binned_contig_num}", + "input_contig_num": "{outputs.final_stats_json.input_contig_num}", + "low_depth_contig_num": "{outputs.final_stats_json.low_depth_contig_num}", + "mags_list": "{outputs.final_stats_json.mags_list}", + "too_short_contig_num": "{outputs.final_stats_json.too_short_contig_num}", + "unbinned_contig_num": "{outputs.final_stats_json.unbinned_contig_num}" + }, + "outputs": [ + { + "output": "final_checkm", + "data_object_type": "CheckM Statistics", + "description": "CheckM for {id}", + "name": "CheckM statistics report", + "id": "nmdc:dobj-11-xvjz5h55" + }, + { + "output": "final_hqmq_bins_zip", + "data_object_type": "Metagenome Bins", + "description": "Metagenome Bins for {id}", + "name": "Metagenome bin tarfiles archive", + "id": "nmdc:dobj-11-85q1v678" + }, + { + "output": "final_gtdbtk_bac_summary", + "data_object_type": "GTDBTK Bacterial Summary", + "description": "Bacterial Summary for {id}", + "name": "GTDBTK bacterial summary", + "id": "nmdc:dobj-11-j5p58211" + }, + { + "output": "final_gtdbtk_ar_summary", + "data_object_type": "GTDBTK Archaeal Summary", + "description": "Archaeal Summary for {id}", + "name": "GTDBTK archaeal summary", + "suffix": "_gtdbtk.ar122.summary.tsv", + "id": "nmdc:dobj-11-ec2fqk35" + }, + { + "output": "mags_version", + "data_object_type": "Metagenome Bins Info File", + "description": "Metagenome Bins Info File for {id}", + "name": "Metagenome Bins Info File", + "id": "nmdc:dobj-11-kg68h909" + } + ] + }, + "activity_id": "nmdc:wfmag-11-g7msr323.1", + "last_status": "Failed", + "done": true, + "failed_count": 2, + "start": "2024-09-16T19:33:32.562412+00:00", + "end": "2024-09-16T21:52:12.873101+00:00", + "opid": "nmdc:wfmag-11-g7msr323.1" + } \ No newline at end of file diff --git a/tests/fixtures/initial_state.json b/tests/fixtures/initial_state.json new file mode 100644 index 00000000..aa7dfc62 --- /dev/null +++ b/tests/fixtures/initial_state.json @@ -0,0 +1,220 @@ +{ + "jobs": [ + { + "type": "MAGs: v1.3.10", + "cromwell_jobid": "9492a397-eb30-472b-9d3b-abc123456789", + "nmdc_jobid": "nmdc:66cf64b6-7462-11ef-8b84-abc123456789", + "conf": { + "git_repo": "https://github.com/microbiomedata/metaMAGs", + "release": "v1.3.10", + "wdl": "mbin_nmdc.wdl", + "activity_id": "nmdc:wfmag-11-g7msr323.1", + "activity_set": "mags_activity_set", + "was_informed_by": "nmdc:omprc-11-9cdxha98", + "trigger_activity": "nmdc:wfmgan-11-jv8kx789.1", + "iteration": 1, + "input_prefix": "nmdc_mags", + "inputs": { + "proj": "nmdc:wfmag-11-g7msr323.1", + "contig_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "sam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgas-11-0qvjnc54.1/nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "gff_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "proteins_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "cog_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "ec_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "ko_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "pfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "tigrfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "crispr_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "product_names_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "gene_phylogeny_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "lineage_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "map_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv" + }, + "input_data_objects": [ + { + "id": "nmdc:dobj-11-1x850k20", + "name": "nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "description": "Assembly contigs (remapped) for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "md5_checksum": "6debed079383eeca2045ce23b0576607", + "file_size_bytes": 2084209623, + "data_object_type": "Assembly Contigs" + }, + { + "id": "nmdc:dobj-11-fkj2kt47", + "name": "nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-9cdxha98", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgas-11-0qvjnc54.1/nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "md5_checksum": "88ec004bd037a3820060427098798666", + "file_size_bytes": 15704979428, + "data_object_type": "Assembly Coverage BAM" + }, + { + "id": "nmdc:dobj-11-f9rnav80", + "name": "nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "description": "Functional Annotation for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "md5_checksum": "349cae9b4fe62bb910f08a183e57b475", + "file_size_bytes": 1320869282, + "data_object_type": "Functional Annotation GFF" + }, + { + "id": "nmdc:dobj-11-btqzf393", + "name": "nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "description": "FASTA Amino Acid File for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "md5_checksum": "292eae73923605dae2ef9f5d582e4603", + "file_size_bytes": 1075716574, + "data_object_type": "Annotation Amino Acid FASTA" + }, + { + "id": "nmdc:dobj-11-hdty3m42", + "name": "nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "description": "COGs for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "md5_checksum": "c4d1121c1ceb1229afb7190d23553003", + "file_size_bytes": 712459544, + "data_object_type": "Clusters of Orthologous Groups (COG) Annotation GFF" + }, + { + "id": "nmdc:dobj-11-0gk70187", + "name": "nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "description": "EC Annotations for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "md5_checksum": "84cf22f39532e1bd001bea8425735a82", + "file_size_bytes": 116429630, + "data_object_type": "Annotation Enzyme Commission" + }, + { + "id": "nmdc:dobj-11-3mtmhf26", + "name": "nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "description": "KEGG Orthology for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "md5_checksum": "17d699df17c97fc28796a198cf40a328", + "file_size_bytes": 169182276, + "data_object_type": "Annotation KEGG Orthology" + }, + { + "id": "nmdc:dobj-11-7kfhf682", + "name": "nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "description": "Pfam Annotation for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "md5_checksum": "23c33758dc138e1af0f39fa1f3ca07db", + "file_size_bytes": 602929841, + "data_object_type": "Pfam Annotation GFF" + }, + { + "id": "nmdc:dobj-11-9hjg8y84", + "name": "nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "description": "TIGRFam for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "md5_checksum": "bbfded219e0b359602725c9efb4f0c54", + "file_size_bytes": 61788991, + "data_object_type": "TIGRFam Annotation GFF" + }, + { + "id": "nmdc:dobj-11-2x0wy902", + "name": "nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "description": "Crispr Terms for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "md5_checksum": "9d2255a63e39552328c4da20ccf2bb3f", + "file_size_bytes": 142989, + "data_object_type": "Crispr Terms" + }, + { + "id": "nmdc:dobj-11-r0bx4g71", + "name": "nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "description": "Product names for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "md5_checksum": "6f1325b2f8dee9b2a75598fb9645c43d", + "file_size_bytes": 401118634, + "data_object_type": "Product Names" + }, + { + "id": "nmdc:dobj-11-7mj15p44", + "name": "nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "description": "Gene Phylogeny for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "md5_checksum": "037aee803f1b81ac5ac1bccb9a18527d", + "file_size_bytes": 748420652, + "data_object_type": "Gene Phylogeny tsv" + }, + { + "id": "nmdc:dobj-11-r2zqpy26", + "name": "nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "description": "Scaffold Lineage tsv for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "md5_checksum": "efdce9771cdda8bd8548e44ef6d1d3a3", + "file_size_bytes": 503898615, + "data_object_type": "Scaffold Lineage tsv" + }, + { + "id": "nmdc:dobj-11-4k2bt072", + "name": "nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv", + "description": "Contig mappings file for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv", + "md5_checksum": "1056a6ef48ce9124de0828ee85246e65", + "file_size_bytes": 250129248, + "data_object_type": "Contig Mapping File" + } + ], + "activity": { + "name": "Metagenome Assembled Genomes Analysis Activity for {id}", + "type": "nmdc:MagsAnalysisActivity", + "binned_contig_num": "{outputs.final_stats_json.binned_contig_num}", + "input_contig_num": "{outputs.final_stats_json.input_contig_num}", + "low_depth_contig_num": "{outputs.final_stats_json.low_depth_contig_num}", + "mags_list": "{outputs.final_stats_json.mags_list}", + "too_short_contig_num": "{outputs.final_stats_json.too_short_contig_num}", + "unbinned_contig_num": "{outputs.final_stats_json.unbinned_contig_num}" + }, + "outputs": [ + { + "output": "final_checkm", + "data_object_type": "CheckM Statistics", + "description": "CheckM for {id}", + "name": "CheckM statistics report", + "id": "nmdc:dobj-11-xvjz5h55" + }, + { + "output": "final_hqmq_bins_zip", + "data_object_type": "Metagenome Bins", + "description": "Metagenome Bins for {id}", + "name": "Metagenome bin tarfiles archive", + "id": "nmdc:dobj-11-85q1v678" + }, + { + "output": "final_gtdbtk_bac_summary", + "data_object_type": "GTDBTK Bacterial Summary", + "description": "Bacterial Summary for {id}", + "name": "GTDBTK bacterial summary", + "id": "nmdc:dobj-11-j5p58211" + }, + { + "output": "final_gtdbtk_ar_summary", + "data_object_type": "GTDBTK Archaeal Summary", + "description": "Archaeal Summary for {id}", + "name": "GTDBTK archaeal summary", + "suffix": "_gtdbtk.ar122.summary.tsv", + "id": "nmdc:dobj-11-ec2fqk35" + }, + { + "output": "mags_version", + "data_object_type": "Metagenome Bins Info File", + "description": "Metagenome Bins Info File for {id}", + "name": "Metagenome Bins Info File", + "id": "nmdc:dobj-11-kg68h909" + } + ] + }, + "activity_id": "nmdc:wfmag-11-g7msr323.1", + "last_status": "Failed", + "done": false, + "failed_count": 1, + "start": "2024-09-16T19:33:32.562412+00:00", + "end": "2024-09-16T21:52:12.873101+00:00", + "opid": "nmdc:test-opid" + } + ] +} \ No newline at end of file diff --git a/tests/fixtures/jobs.json b/tests/fixtures/jobs.json deleted file mode 100644 index fe51488c..00000000 --- a/tests/fixtures/jobs.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/tests/fixtures/jobs_api_response.json b/tests/fixtures/jobs_api_response.json new file mode 100644 index 00000000..b2d2dab9 --- /dev/null +++ b/tests/fixtures/jobs_api_response.json @@ -0,0 +1,579 @@ +{ + "resources": [ + { + "workflow": { + "id": "Metagenome Annotation: v1.1.0" + }, + "id": "nmdc:0003398c-48a8-11ef-bcec-52b18d4509d1", + "created_at": "2024-07-23T03:59:46", + "config": { + "git_repo": "https://github.com/microbiomedata/mg_annotation", + "release": "v1.1.0", + "wdl": "annotation_full.wdl", + "activity_id": "nmdc:wfmgan-11-fp07wg93.2", + "activity_set": "metagenome_annotation_activity_set", + "was_informed_by": "nmdc:omprc-11-dfzknb42", + "trigger_activity": "nmdc:wfmgas-11-tekfqa46.1", + "iteration": 2, + "input_prefix": "annotation", + "inputs": { + "input_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-dfzknb42/nmdc:wfmgas-11-tekfqa46.1/nmdc_wfmgas-11-tekfqa46.1_contigs.fna", + "imgap_project_id": "scaffold", + "proj": "nmdc:wfmgan-11-fp07wg93.2" + }, + "input_data_objects": [ + { + "id": "nmdc:dobj-11-g7y3gy61", + "name": "Final assembly contigs fasta", + "description": "Assembly contigs for nmdc:wfmgas-11-tekfqa46.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dfzknb42/nmdc:wfmgas-11-tekfqa46.1/nmdc_wfmgas-11-tekfqa46.1_contigs.fna", + "md5_checksum": "89b0300d904b16e46e9d749cf633a911", + "file_size_bytes": 714986508, + "data_object_type": "Assembly Contigs" + } + ], + "activity": { + "name": "Metagenome Annotation Analysis Activity for {id}", + "type": "nmdc:MetagenomeAnnotationActivity" + }, + "outputs": [ + { + "output": "proteins_faa", + "data_object_type": "Annotation Amino Acid FASTA", + "description": "FASTA Amino Acid File for {id}", + "name": "FASTA amino acid file for annotated proteins", + "id": "nmdc:dobj-11-4akzce87" + }, + { + "output": "structural_gff", + "data_object_type": "Structural Annotation GFF", + "description": "Structural Annotation for {id}", + "name": "GFF3 format file with structural annotations", + "id": "nmdc:dobj-11-ek6tfd02" + }, + { + "output": "functional_gff", + "data_object_type": "Functional Annotation GFF", + "description": "Functional Annotation for {id}", + "name": "GFF3 format file with functional annotations", + "id": "nmdc:dobj-11-bm1qkk71" + }, + { + "output": "ko_tsv", + "data_object_type": "Annotation KEGG Orthology", + "description": "KEGG Orthology for {id}", + "name": "Tab delimited file for KO annotation", + "id": "nmdc:dobj-11-8h1tb446" + }, + { + "output": "ec_tsv", + "data_object_type": "Annotation Enzyme Commission", + "description": "EC Annotations for {id}", + "name": "Tab delimited file for EC annotation", + "suffix": "_ec.tsv", + "id": "nmdc:dobj-11-b7y5vv18" + }, + { + "output": "lineage_tsv", + "data_object_type": "Scaffold Lineage tsv", + "description": "Scaffold Lineage tsv for {id}", + "name": "Phylogeny at the scaffold level", + "suffix": "_scaffold_lineage.tsv", + "id": "nmdc:dobj-11-fn3ba237" + }, + { + "output": "cog_gff", + "data_object_type": "Clusters of Orthologous Groups (COG) Annotation GFF", + "description": "COGs for {id}", + "name": "GFF3 format file with COGs", + "id": "nmdc:dobj-11-6q7fh110" + }, + { + "output": "pfam_gff", + "data_object_type": "Pfam Annotation GFF", + "description": "Pfam Annotation for {id}", + "name": "GFF3 format file with Pfam", + "id": "nmdc:dobj-11-d85sqw71" + }, + { + "output": "tigrfam_gff", + "data_object_type": "TIGRFam Annotation GFF", + "description": "TIGRFam for {id}", + "name": "GFF3 format file with TIGRfam", + "id": "nmdc:dobj-11-fjq3q643" + }, + { + "output": "smart_gff", + "data_object_type": "SMART Annotation GFF", + "description": "SMART Annotations for {id}", + "name": "GFF3 format file with SMART", + "id": "nmdc:dobj-11-dhfdb719" + }, + { + "output": "supfam_gff", + "data_object_type": "SUPERFam Annotation GFF", + "description": "SUPERFam Annotations for {id}", + "name": "GFF3 format file with SUPERFam", + "id": "nmdc:dobj-11-qfmw4879" + }, + { + "output": "cath_funfam_gff", + "data_object_type": "CATH FunFams (Functional Families) Annotation GFF", + "description": "CATH FunFams for {id}", + "name": "GFF3 format file with CATH FunFams", + "id": "nmdc:dobj-11-ssa39z59" + }, + { + "output": "crt_gff", + "data_object_type": "CRT Annotation GFF", + "description": "CRT Annotations for {id}", + "name": "GFF3 format file with CRT", + "id": "nmdc:dobj-11-4hsvsv34" + }, + { + "output": "genemark_gff", + "data_object_type": "Genemark Annotation GFF", + "description": "Genemark Annotations for {id}", + "name": "GFF3 format file with Genemark", + "id": "nmdc:dobj-11-saaeyc80" + }, + { + "output": "prodigal_gff", + "data_object_type": "Prodigal Annotation GFF", + "description": "Prodigal Annotations {id}", + "name": "GFF3 format file with Prodigal", + "id": "nmdc:dobj-11-2kastj13" + }, + { + "output": "trna_gff", + "data_object_type": "TRNA Annotation GFF", + "description": "TRNA Annotations {id}", + "name": "GFF3 format file with TRNA", + "id": "nmdc:dobj-11-240tpb83" + }, + { + "output": "final_rfam_gff", + "data_object_type": "RFAM Annotation GFF", + "description": "RFAM Annotations for {id}", + "name": "GFF3 format file with RFAM", + "id": "nmdc:dobj-11-vn35b118" + }, + { + "output": "ko_ec_gff", + "data_object_type": "KO_EC Annotation GFF", + "description": "KO_EC Annotations for {id}", + "name": "GFF3 format file with KO_EC", + "id": "nmdc:dobj-11-20eag911" + }, + { + "output": "product_names_tsv", + "data_object_type": "Product Names", + "description": "Product names for {id}", + "name": "Product names file", + "id": "nmdc:dobj-11-bnjjvh49" + }, + { + "output": "gene_phylogeny_tsv", + "data_object_type": "Gene Phylogeny tsv", + "description": "Gene Phylogeny for {id}", + "name": "Gene Phylogeny file", + "id": "nmdc:dobj-11-6hgrts53" + }, + { + "output": "crt_crisprs", + "data_object_type": "Crispr Terms", + "description": "Crispr Terms for {id}", + "name": "Crispr Terms", + "id": "nmdc:dobj-11-mghmgg98" + }, + { + "output": "stats_tsv", + "data_object_type": "Annotation Statistics", + "description": "Annotation Stats for {id}", + "name": "Annotation statistics report", + "id": "nmdc:dobj-11-x2bvpj13" + }, + { + "output": "renamed_fasta", + "name": "Renamed assembly contigs fasta", + "data_object_type": "Assembly Contigs", + "description": "Assembly contigs (remapped) for {id}", + "id": "nmdc:dobj-11-hwd2k772" + }, + { + "output": "map_file", + "data_object_type": "Contig Mapping File", + "description": "Contig mappings file for {id}", + "name": "Contig mappings between contigs and scaffolds", + "suffix": "_contig_names_mapping.tsv", + "optional": true, + "id": "nmdc:dobj-11-6r54q802" + }, + { + "output": "imgap_version", + "data_object_type": "Annotation Info File", + "description": "Annotation info for {id}", + "name": "File containing annotation info", + "id": "nmdc:dobj-11-4mr0ae56" + } + ] + }, + "claims": [ + { + "op_id": "nmdc:sys0egpxjn25", + "site_id": "NERSC" + } + ] + }, + { + "workflow": { + "id": "Metagenome Assembly: v1.0.3" + }, + "id": "nmdc:00044f52-833c-11ee-bd0f-067aae39013b", + "created_at": "2023-11-14T22:20:21", + "config": { + "git_repo": "https://github.com/microbiomedata/metaAssembly", + "release": "v1.0.3", + "wdl": "jgi_assembly.wdl", + "activity_id": "nmdc:wfmgas-11-wtz4rz76.1", + "activity_set": "metagenome_assembly_set", + "was_informed_by": "nmdc:omprc-12-5vn1nh02", + "trigger_activity": "nmdc:wfrqc-11-4eethb84.1", + "iteration": 1, + "input_prefix": "jgi_metaASM", + "inputs": { + "input_file": "https://data.microbiomedata.org/data/nmdc:omprc-12-5vn1nh02/nmdc:wfrqc-11-4eethb84.1/nmdc_wfrqc-11-4eethb84.1_filtered.fastq.gz", + "rename_contig_prefix": "nmdc:wfmgas-11-wtz4rz76.1", + "proj": "nmdc:wfmgas-11-wtz4rz76.1" + }, + "input_data_objects": [ + { + "id": "nmdc:dobj-11-mh9dma28", + "name": "nmdc_wfrqc-11-4eethb84.1_filtered.fastq.gz", + "description": "Reads QC for nmdc:wfrqc-11-4eethb84.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-12-5vn1nh02/nmdc:wfrqc-11-4eethb84.1/nmdc_wfrqc-11-4eethb84.1_filtered.fastq.gz", + "md5_checksum": "3e60e2633256727a38a3c3b368a01732", + "file_size_bytes": 2218610459, + "data_object_type": "Filtered Sequencing Reads" + } + ], + "activity": { + "name": "Metagenome Assembly Activity for {id}", + "type": "nmdc:MetagenomeAssembly", + "asm_score": "{outputs.stats.asm_score}", + "contig_bp": "{outputs.stats.contig_bp}", + "contigs": "{outputs.stats.contigs}", + "ctg_l50": "{outputs.stats.ctg_l50}", + "ctg_l90": "{outputs.stats.ctg_l90}", + "ctg_logsum": "{outputs.stats.ctg_logsum}", + "ctg_max": "{outputs.stats.ctg_max}", + "ctg_n50": "{outputs.stats.ctg_n50}", + "ctg_n90": "{outputs.stats.ctg_n90}", + "ctg_powsum": "{outputs.stats.ctg_powsum}", + "gap_pct": "{outputs.stats.gap_pct}", + "gc_avg": "{outputs.stats.gc_avg}", + "gc_std": "{outputs.stats.gc_std}", + "scaf_bp": "{outputs.stats.scaf_bp}", + "scaf_l50": "{outputs.stats.scaf_l50}", + "scaf_l90": "{outputs.stats.scaf_l90}", + "scaf_l_gt50k": "{outputs.stats.scaf_l_gt50k}", + "scaf_logsum": "{outputs.stats.scaf_logsum}", + "scaf_max": "{outputs.stats.scaf_max}", + "scaf_n50": "{outputs.stats.scaf_n50}", + "scaf_n90": "{outputs.stats.scaf_n90}", + "scaf_n_gt50k": "{outputs.stats.scaf_n_gt50k}", + "scaf_pct_gt50k": "{outputs.stats.scaf_pct_gt50k}", + "scaf_powsum": "{outputs.stats.scaf_powsum}", + "scaffolds": "{outputs.stats.scaffolds}" + }, + "outputs": [ + { + "output": "contig", + "name": "Final assembly contigs fasta", + "suffix": "_contigs.fna", + "data_object_type": "Assembly Contigs", + "description": "Assembly contigs for {id}", + "id": "nmdc:dobj-11-xgrxb861" + }, + { + "output": "scaffold", + "name": "Final assembly scaffolds fasta", + "suffix": "_scaffolds.fna", + "data_object_type": "Assembly Scaffolds", + "description": "Assembly scaffolds for {id}", + "id": "nmdc:dobj-11-s2dd2t90" + }, + { + "output": "covstats", + "name": "Assembled contigs coverage information", + "suffix": "_covstats.txt", + "data_object_type": "Assembly Coverage Stats", + "description": "Coverage Stats for {id}", + "id": "nmdc:dobj-11-3bkdmw35" + }, + { + "output": "agp", + "name": "An AGP format file that describes the assembly", + "suffix": "_assembly.agp", + "data_object_type": "Assembly AGP", + "description": "AGP for {id}", + "id": "nmdc:dobj-11-pyrthm29" + }, + { + "output": "bam", + "name": "Sorted bam file of reads mapping back to the final assembly", + "suffix": "_pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM", + "description": "Sorted Bam for {id}", + "id": "nmdc:dobj-11-f5b4wy79" + }, + { + "output": "asminfo", + "name": "File containing assembly info", + "suffix": "_metaAsm.info", + "data_object_type": "Assembly Info File", + "description": "Assembly info for {id}", + "id": "nmdc:dobj-11-z9hckx95" + } + ] + }, + "claims": [ + { + "op_id": "nmdc:sys0b9ktag76", + "site_id": "NERSC" + } + ] + }, + { + "workflow": { + "id": "Metagenome Annotation: v1.0.4" + }, + "id": "nmdc:005782ec-4081-11ee-9be4-ee3fb66564cb", + "created_at": "2023-08-22T00:15:29", + "config": { + "git_repo": "https://github.com/microbiomedata/mg_annotation", + "release": "v1.0.4", + "wdl": "annotation_full.wdl", + "activity_id": "nmdc:wfmgan-11-5b6pg295.1", + "activity_set": "metagenome_annotation_activity_set", + "was_informed_by": "nmdc:omprc-11-dcd0jq29", + "trigger_activity": "nmdc:wfmgas-11-hdxbp548.1", + "iteration": 1, + "input_prefix": "annotation", + "inputs": { + "input_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-dcd0jq29/nmdc:wfmgas-11-hdxbp548.1/nmdc_wfmgas-11-hdxbp548.1_contigs.fna", + "imgap_project_id": "scaffold", + "proj": "nmdc:wfmgan-11-5b6pg295.1" + }, + "input_data_objects": [ + { + "id": "nmdc:dobj-11-598qwk38", + "name": "nmdc_wfmgas-11-hdxbp548.1_contigs.fna", + "description": "Assembly contigs for nmdc:wfmgas-11-hdxbp548.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dcd0jq29/nmdc:wfmgas-11-hdxbp548.1/nmdc_wfmgas-11-hdxbp548.1_contigs.fna", + "md5_checksum": "4e6d4fcc9f330f6a616ddb9595ef9509", + "file_size_bytes": 18918050, + "data_object_type": "Assembly Contigs" + } + ], + "activity": { + "name": "Metagenome Annotation Analysis Activity for {id}", + "type": "nmdc:MetagenomeAnnotationActivity" + }, + "outputs": [ + { + "output": "proteins_faa", + "data_object_type": "Annotation Amino Acid FASTA", + "description": "FASTA Amino Acid File for {id}", + "name": "FASTA amino acid file for annotated proteins", + "suffix": "_proteins.faa", + "id": "nmdc:dobj-11-a1r5a733" + }, + { + "output": "structural_gff", + "data_object_type": "Structural Annotation GFF", + "description": "Structural Annotation for {id}", + "name": "GFF3 format file with structural annotations", + "suffix": "_structural_annotation.gff", + "id": "nmdc:dobj-11-tqtycq13" + }, + { + "output": "functional_gff", + "data_object_type": "Functional Annotation GFF", + "description": "Functional Annotation for {id}", + "name": "GFF3 format file with functional annotations", + "suffix": "_functional_annotation.gff", + "id": "nmdc:dobj-11-f97mze63" + }, + { + "output": "ko_tsv", + "data_object_type": "Annotation KEGG Orthology", + "description": "KEGG Orthology for {id}", + "name": "Tab delimited file for KO annotation", + "suffix": "_ko.tsv", + "id": "nmdc:dobj-11-3tgvxd08" + }, + { + "output": "ec_tsv", + "data_object_type": "Annotation Enzyme Commission", + "description": "EC Annotations for {id}", + "name": "Tab delimited file for EC annotation", + "suffix": "_ec.tsv", + "id": "nmdc:dobj-11-zxcnx432" + }, + { + "output": "lineage_tsv", + "data_object_type": "Scaffold Lineage tsv", + "description": "Scaffold Lineage tsv for {id}", + "name": "Phylogeny at the scaffold level", + "suffix": "_scaffold_lineage.tsv", + "id": "nmdc:dobj-11-zj305709" + }, + { + "output": "cog_gff", + "data_object_type": "Clusters of Orthologous Groups (COG) Annotation GFF", + "description": "COGs for {id}", + "name": "GFF3 format file with COGs", + "suffix": "_cog.gff", + "id": "nmdc:dobj-11-5fxvrv97" + }, + { + "output": "pfam_gff", + "data_object_type": "Pfam Annotation GFF", + "description": "Pfam Annotation for {id}", + "name": "GFF3 format file with Pfam", + "suffix": "_pfam.gff", + "id": "nmdc:dobj-11-9z6ah352" + }, + { + "output": "tigrfam_gff", + "data_object_type": "TIGRFam Annotation GFF", + "description": "TIGRFam for {id}", + "name": "GFF3 format file with TIGRfam", + "suffix": "_tigrfam.gff", + "id": "nmdc:dobj-11-yhfecr06" + }, + { + "output": "smart_gff", + "data_object_type": "SMART Annotation GFF", + "description": "SMART Annotations for {id}", + "name": "GFF3 format file with SMART", + "suffix": "_smart.gff", + "id": "nmdc:dobj-11-9gp4da96" + }, + { + "output": "supfam_gff", + "data_object_type": "SUPERFam Annotation GFF", + "description": "SUPERFam Annotations for {id}", + "name": "GFF3 format file with SUPERFam", + "suffix": "_supfam.gff", + "id": "nmdc:dobj-11-b54ak435" + }, + { + "output": "cath_funfam_gff", + "data_object_type": "CATH FunFams (Functional Families) Annotation GFF", + "description": "CATH FunFams for {id}", + "name": "GFF3 format file with CATH FunFams", + "suffix": "_cath_funfam.gff", + "id": "nmdc:dobj-11-r3b6fh65" + }, + { + "output": "crt_gff", + "data_object_type": "CRT Annotation GFF", + "description": "CRT Annotations for {id}", + "name": "GFF3 format file with CRT", + "suffix": "_crt.gff", + "id": "nmdc:dobj-11-s1770x66" + }, + { + "output": "genemark_gff", + "data_object_type": "Genemark Annotation GFF", + "description": "Genemark Annotations for {id}", + "name": "GFF3 format file with Genemark", + "suffix": "_genemark.gff", + "id": "nmdc:dobj-11-tfskm895" + }, + { + "output": "prodigal_gff", + "data_object_type": "Prodigal Annotation GFF", + "description": "Prodigal Annotations {id}", + "name": "GFF3 format file with Prodigal", + "suffix": "_prodigal.gff", + "id": "nmdc:dobj-11-0vt22n49" + }, + { + "output": "trna_gff", + "data_object_type": "TRNA Annotation GFF", + "description": "TRNA Annotations {id}", + "name": "GFF3 format file with TRNA", + "suffix": "_trna.gff", + "id": "nmdc:dobj-11-g1z3e990" + }, + { + "output": "final_rfam_gff", + "data_object_type": "RFAM Annotation GFF", + "description": "RFAM Annotations for {id}", + "name": "GFF3 format file with RFAM", + "suffix": "_rfam.gff", + "id": "nmdc:dobj-11-dpz65681" + }, + { + "output": "ko_ec_gff", + "data_object_type": "KO_EC Annotation GFF", + "description": "KO_EC Annotations for {id}", + "name": "GFF3 format file with KO_EC", + "suffix": "_ko_ec.gff", + "id": "nmdc:dobj-11-s64gp211" + }, + { + "output": "product_names_tsv", + "data_object_type": "Product Names", + "description": "Product names for {id}", + "name": "Product names file", + "suffix": "_product_names.tsv", + "id": "nmdc:dobj-11-7dfvzs81" + }, + { + "output": "gene_phylogeny_tsv", + "data_object_type": "Gene Phylogeny tsv", + "description": "Gene Phylogeny for {id}", + "name": "Gene Phylogeny file", + "suffix": "_gene_phylogeny.tsv", + "id": "nmdc:dobj-11-hagw0713" + }, + { + "output": "crt_crisprs", + "data_object_type": "Crispr Terms", + "description": "Crispr Terms for {id}", + "name": "Crispr Terms", + "suffix": "_crt.crisprs", + "id": "nmdc:dobj-11-nsnye718" + }, + { + "output": "stats_tsv", + "data_object_type": "Annotation Statistics", + "description": "Annotation Stats for {id}", + "name": "Annotation statistics report", + "suffix": "_stats.tsv", + "id": "nmdc:dobj-11-naqp2149" + }, + { + "output": "imgap_version", + "data_object_type": "Annotation Info File", + "description": "Annotation info for {id}", + "name": "File containing annotation info", + "suffix": "_imgap.info", + "id": "nmdc:dobj-11-z7vpmz16" + } + ] + }, + "claims": [ + { + "op_id": "nmdc:sys0dxyztg13", + "site_id": "NERSC" + } + ] + } + ], + "next_page_token": "nmdc:sys0c1zcq972" +} \ No newline at end of file diff --git a/tests/fixtures/mags_activity_set.json b/tests/fixtures/mags_activity_set.json deleted file mode 100644 index ecf52a32..00000000 --- a/tests/fixtures/mags_activity_set.json +++ /dev/null @@ -1,45 +0,0 @@ -[ - { - "has_input": [ - "nmdc:dobj-11-contigs", - "nmdc:dobj-11-functional", - "nmdc:dobj-11-funfam", - "nmdc:dobj-11-superfam", - "nmdc:dobj-11-cogs", - "nmdc:dobj-11-pfam", - "nmdc:dobj-11-productnames", - "nmdc:dobj-11-tigrfam", - "nmdc:dobj-11-ec", - "nmdc:dobj-11-kegg", - "nmdc:dobj-11-scaffoldlineage", - "nmdc:dobj-11-coverbam", - "nmdc:dobj-11-smart", - "nmdc:dobj-11-proteinsfaa", - "nmdc:dobj-11-contigmapping", - "nmdc:dobj-11-genephylo" - ], - "part_of": [ - "nmdc:omprc-11-metag1" - ], - "git_url": "https://github.com/microbiomedata/metaMAGs", - "version": "v1.1.0", - "has_output": [ - "nmdc:dobj-11-bacsummary", - "nmdc:dobj-11-lqbin", - "nmdc:dobj-11-hqmqbin", - "nmdc:dobj-11-arsummary", - "nmdc:dobj-11-checkm", - "nmdc:dobj-11-bininfo", - "nmdc:dobj-11-heatmap", - "nmdc:dobj-11-krona", - "nmdc:dobj-11-barplot" - ], - "was_informed_by": "nmdc:omprc-11-metag1", - "id": "nmdc:wfmags-11-metag1.1", - "execution_resource": "NERSC-Cori", - "name": "MAGs Analysis Activity for nmdc:omprc-11-metag1", - "started_at_time": "2021-08-05T14:48:51+00:00", - "type": "nmdc:MAGsAnalysisActivity", - "ended_at_time": "2021-09-15T10:13:20+00:00" - } -] diff --git a/tests/fixtures/mags_config.yaml b/tests/fixtures/mags_config.yaml new file mode 100644 index 00000000..f9bf02fb --- /dev/null +++ b/tests/fixtures/mags_config.yaml @@ -0,0 +1,76 @@ + - Name: MAGs + Type: nmdc:MagsAnalysis + Enabled: True + Analyte Category: Metagenome + Git_repo: https://github.com/microbiomedata/metaMAGs + Version: v1.3.10 + WDL: mbin_nmdc.wdl + Collection: workflow_execution_set + Predecessors: + - Metagenome Annotation + Input_prefix: nmdc_mags + Inputs: + proj: "{workflow_execution_id}" + contig_file: do:Assembly Contigs + sam_file: do:Assembly Coverage BAM + gff_file: do:Functional Annotation GFF + proteins_file: do:Annotation Amino Acid FASTA + cog_file: do:Clusters of Orthologous Groups (COG) Annotation GFF + ec_file: do:Annotation Enzyme Commission + ko_file: do:Annotation KEGG Orthology + pfam_file: do:Pfam Annotation GFF + tigrfam_file: do:TIGRFam Annotation GFF + crispr_file: do:Crispr Terms + product_names_file: do:Product Names + gene_phylogeny_file: do:Gene Phylogeny tsv + lineage_file: do:Scaffold Lineage tsv + map_file: do:Contig Mapping File + Optional Inputs: + - map_file + Workflow Execution: + name: "Metagenome Assembled Genomes Analysis for {id}" + type: nmdc:MagsAnalysis + binned_contig_num: "{outputs.final_stats_json.binned_contig_num}" + input_contig_num: "{outputs.final_stats_json.input_contig_num}" + low_depth_contig_num: "{outputs.final_stats_json.low_depth_contig_num}" + mags_list: "{outputs.final_stats_json.mags_list}" + too_short_contig_num: "{outputs.final_stats_json.too_short_contig_num}" + unbinned_contig_num: "{outputs.final_stats_json.unbinned_contig_num}" + Outputs: + - output: final_checkm + data_object_type: CheckM Statistics + description: CheckM for {id} + name: CheckM statistics report + - output: final_hqmq_bins_zip + data_object_type: Metagenome HQMQ Bins Compression File + description: Metagenome HQMQ Bins for {id} + name: Metagenome hqmq bin zip archive + - output: final_gtdbtk_bac_summary + data_object_type: GTDBTK Bacterial Summary + description: Bacterial Summary for {id} + name: GTDBTK bacterial summary + - output: final_gtdbtk_ar_summary + data_object_type: GTDBTK Archaeal Summary + description: Archaeal Summary for {id} + name: GTDBTK archaeal summary + suffix: _gtdbtk.ar122.summary.tsv + - output: mags_version + data_object_type: Metagenome Bins Info File + description: Metagenome Bins Info File for {id} + name: Metagenome Bins Info File + - output: final_lq_bins_zip + data_object_type: Metagenome LQ Bins Compression File + description: Metagenome LQ Bins for {id} + name: Metagenome lq bin zip archive + - output: heatmap + data_object_type: Metagenome Bins Heatmap + description: Metagenome heatmap for {id} + name: Metagenome Heatmap File + - output: barplot + data_object_type: Metagenome Bins Barplot + description: Metagenome barplot for {id} + name: Metagenome Barplot File + - output: kronaplot + data_object_type: Metagenome Bins Krona Plot + description: Metagenome Bins Krona Plot for {id} + name: Metagenome Krona Bins Plot File \ No newline at end of file diff --git a/tests/fixtures/mags_final_stats.json b/tests/fixtures/mags_final_stats.json new file mode 100644 index 00000000..2c1e53f3 --- /dev/null +++ b/tests/fixtures/mags_final_stats.json @@ -0,0 +1,189 @@ +{ + "input_contig_num": 2273412, + "too_short_contig_num": 2005162, + "low_depth_contig_num": 0, + "unbinned_contig_num": 241036, + "binned_contig_num": 27214, + "mags_list": [ + { + "bin_name": "bins.40", + "number_of_contig": 44, + "completeness": 97.3, + "contamination": 3.38, + "total_bases": 0, + "gene_count": "null", + "bin_quality": "MQ", + "num_16s": 0, + "num_5s": 0, + "num_23s": 0, + "num_tRNA": 0, + "gtdbtk_domain": "Bacteria", + "gtdbtk_phylum": "Verrucomicrobiota", + "gtdbtk_class": "Verrucomicrobiae", + "gtdbtk_order": "Pedosphaerales", + "gtdbtk_family": "UBA11358", + "gtdbtk_genus": "UBA11358", + "gtdbtk_species": "null", + "members_id": [ + "nmdc:wfmgas-13-56028x05.1_7_c1", + "nmdc:wfmgas-13-56028x05.1_9_c1", + "nmdc:wfmgas-13-56028x05.1_16_c1", + "nmdc:wfmgas-13-56028x05.1_20_c1", + "nmdc:wfmgas-13-56028x05.1_23_c1", + "nmdc:wfmgas-13-56028x05.1_27_c1", + "nmdc:wfmgas-13-56028x05.1_45_c1", + "nmdc:wfmgas-13-56028x05.1_55_c1", + "nmdc:wfmgas-13-56028x05.1_71_c1", + "nmdc:wfmgas-13-56028x05.1_79_c1", + "nmdc:wfmgas-13-56028x05.1_99_c1", + "nmdc:wfmgas-13-56028x05.1_52_c2", + "nmdc:wfmgas-13-56028x05.1_127_c1", + "nmdc:wfmgas-13-56028x05.1_131_c1", + "nmdc:wfmgas-13-56028x05.1_137_c1", + "nmdc:wfmgas-13-56028x05.1_169_c1", + "nmdc:wfmgas-13-56028x05.1_200_c1", + "nmdc:wfmgas-13-56028x05.1_212_c1", + "nmdc:wfmgas-13-56028x05.1_223_c1", + "nmdc:wfmgas-13-56028x05.1_372_c1", + "nmdc:wfmgas-13-56028x05.1_393_c1", + "nmdc:wfmgas-13-56028x05.1_428_c1", + "nmdc:wfmgas-13-56028x05.1_52_c1", + "nmdc:wfmgas-13-56028x05.1_582_c1", + "nmdc:wfmgas-13-56028x05.1_706_c1", + "nmdc:wfmgas-13-56028x05.1_888_c1", + "nmdc:wfmgas-13-56028x05.1_912_c1", + "nmdc:wfmgas-13-56028x05.1_1268_c1", + "nmdc:wfmgas-13-56028x05.1_1271_c1", + "nmdc:wfmgas-13-56028x05.1_1492_c1", + "nmdc:wfmgas-13-56028x05.1_1494_c1", + "nmdc:wfmgas-13-56028x05.1_1604_c1", + "nmdc:wfmgas-13-56028x05.1_1627_c1", + "nmdc:wfmgas-13-56028x05.1_1888_c1", + "nmdc:wfmgas-13-56028x05.1_1938_c1", + "nmdc:wfmgas-13-56028x05.1_2944_c1", + "nmdc:wfmgas-13-56028x05.1_3261_c1", + "nmdc:wfmgas-13-56028x05.1_3477_c1", + "nmdc:wfmgas-13-56028x05.1_4194_c1", + "nmdc:wfmgas-13-56028x05.1_6257_c1", + "nmdc:wfmgas-13-56028x05.1_7589_c1", + "nmdc:wfmgas-13-56028x05.1_10469_c1", + "nmdc:wfmgas-13-56028x05.1_10553_c1", + "nmdc:wfmgas-13-56028x05.1_13792_c1" + ] + }, + { + "bin_name": "bins.9", + "number_of_contig": 92, + "completeness": 0.0, + "contamination": 0.0, + "total_bases": 0, + "gene_count": "null", + "bin_quality": "LQ", + "num_16s": 0, + "num_5s": 0, + "num_23s": 0, + "num_tRNA": 0, + "gtdbtk_domain": "null", + "gtdbtk_phylum": "null", + "gtdbtk_class": "null", + "gtdbtk_order": "null", + "gtdbtk_family": "null", + "gtdbtk_genus": "null", + "gtdbtk_species": "null", + "members_id": [ + "nmdc:wfmgas-13-56028x05.1_7094_c1", + "nmdc:wfmgas-13-56028x05.1_9486_c1", + "nmdc:wfmgas-13-56028x05.1_9853_c1", + "nmdc:wfmgas-13-56028x05.1_10857_c1", + "nmdc:wfmgas-13-56028x05.1_11702_c1", + "nmdc:wfmgas-13-56028x05.1_12042_c1", + "nmdc:wfmgas-13-56028x05.1_14174_c1", + "nmdc:wfmgas-13-56028x05.1_14597_c1", + "nmdc:wfmgas-13-56028x05.1_16115_c1", + "nmdc:wfmgas-13-56028x05.1_16261_c1", + "nmdc:wfmgas-13-56028x05.1_16795_c1", + "nmdc:wfmgas-13-56028x05.1_16943_c1", + "nmdc:wfmgas-13-56028x05.1_17208_c1", + "nmdc:wfmgas-13-56028x05.1_17245_c1", + "nmdc:wfmgas-13-56028x05.1_17383_c1", + "nmdc:wfmgas-13-56028x05.1_17783_c1", + "nmdc:wfmgas-13-56028x05.1_18468_c1", + "nmdc:wfmgas-13-56028x05.1_18553_c1", + "nmdc:wfmgas-13-56028x05.1_18858_c1", + "nmdc:wfmgas-13-56028x05.1_19302_c1", + "nmdc:wfmgas-13-56028x05.1_19824_c1", + "nmdc:wfmgas-13-56028x05.1_20316_c1", + "nmdc:wfmgas-13-56028x05.1_20787_c1", + "nmdc:wfmgas-13-56028x05.1_21029_c1", + "nmdc:wfmgas-13-56028x05.1_21435_c1", + "nmdc:wfmgas-13-56028x05.1_21475_c1", + "nmdc:wfmgas-13-56028x05.1_21484_c1", + "nmdc:wfmgas-13-56028x05.1_21518_c1", + "nmdc:wfmgas-13-56028x05.1_21685_c1", + "nmdc:wfmgas-13-56028x05.1_21809_c1", + "nmdc:wfmgas-13-56028x05.1_21924_c1", + "nmdc:wfmgas-13-56028x05.1_21958_c1", + "nmdc:wfmgas-13-56028x05.1_22186_c1", + "nmdc:wfmgas-13-56028x05.1_22271_c1", + "nmdc:wfmgas-13-56028x05.1_22516_c1", + "nmdc:wfmgas-13-56028x05.1_22514_c1", + "nmdc:wfmgas-13-56028x05.1_22777_c1", + "nmdc:wfmgas-13-56028x05.1_23003_c1", + "nmdc:wfmgas-13-56028x05.1_23115_c1", + "nmdc:wfmgas-13-56028x05.1_23204_c1", + "nmdc:wfmgas-13-56028x05.1_23239_c1", + "nmdc:wfmgas-13-56028x05.1_23352_c1", + "nmdc:wfmgas-13-56028x05.1_23445_c1", + "nmdc:wfmgas-13-56028x05.1_23505_c1", + "nmdc:wfmgas-13-56028x05.1_23571_c1", + "nmdc:wfmgas-13-56028x05.1_24047_c1", + "nmdc:wfmgas-13-56028x05.1_24749_c1", + "nmdc:wfmgas-13-56028x05.1_24981_c1", + "nmdc:wfmgas-13-56028x05.1_25059_c1", + "nmdc:wfmgas-13-56028x05.1_25526_c1", + "nmdc:wfmgas-13-56028x05.1_26162_c1", + "nmdc:wfmgas-13-56028x05.1_26376_c1", + "nmdc:wfmgas-13-56028x05.1_26773_c1", + "nmdc:wfmgas-13-56028x05.1_26816_c1", + "nmdc:wfmgas-13-56028x05.1_26891_c1", + "nmdc:wfmgas-13-56028x05.1_27179_c1", + "nmdc:wfmgas-13-56028x05.1_27272_c1", + "nmdc:wfmgas-13-56028x05.1_27358_c1", + "nmdc:wfmgas-13-56028x05.1_27411_c1", + "nmdc:wfmgas-13-56028x05.1_27550_c1", + "nmdc:wfmgas-13-56028x05.1_28892_c1", + "nmdc:wfmgas-13-56028x05.1_29003_c1", + "nmdc:wfmgas-13-56028x05.1_29238_c1", + "nmdc:wfmgas-13-56028x05.1_29324_c1", + "nmdc:wfmgas-13-56028x05.1_29771_c1", + "nmdc:wfmgas-13-56028x05.1_29878_c1", + "nmdc:wfmgas-13-56028x05.1_30248_c1", + "nmdc:wfmgas-13-56028x05.1_30476_c1", + "nmdc:wfmgas-13-56028x05.1_30587_c1", + "nmdc:wfmgas-13-56028x05.1_31160_c1", + "nmdc:wfmgas-13-56028x05.1_31834_c1", + "nmdc:wfmgas-13-56028x05.1_31922_c1", + "nmdc:wfmgas-13-56028x05.1_31971_c1", + "nmdc:wfmgas-13-56028x05.1_32244_c1", + "nmdc:wfmgas-13-56028x05.1_32605_c1", + "nmdc:wfmgas-13-56028x05.1_32623_c1", + "nmdc:wfmgas-13-56028x05.1_32832_c1", + "nmdc:wfmgas-13-56028x05.1_33068_c1", + "nmdc:wfmgas-13-56028x05.1_33334_c1", + "nmdc:wfmgas-13-56028x05.1_33438_c1", + "nmdc:wfmgas-13-56028x05.1_33855_c1", + "nmdc:wfmgas-13-56028x05.1_34035_c1", + "nmdc:wfmgas-13-56028x05.1_34120_c1", + "nmdc:wfmgas-13-56028x05.1_34140_c1", + "nmdc:wfmgas-13-56028x05.1_34133_c1", + "nmdc:wfmgas-13-56028x05.1_34177_c1", + "nmdc:wfmgas-13-56028x05.1_34481_c1", + "nmdc:wfmgas-13-56028x05.1_34728_c1", + "nmdc:wfmgas-13-56028x05.1_34843_c1", + "nmdc:wfmgas-13-56028x05.1_35665_c1", + "nmdc:wfmgas-13-56028x05.1_35772_c1", + "nmdc:wfmgas-13-56028x05.1_35995_c1" + ] + } + ] +} \ No newline at end of file diff --git a/tests/fixtures/mags_job_metadata.json b/tests/fixtures/mags_job_metadata.json new file mode 100644 index 00000000..552aeac8 --- /dev/null +++ b/tests/fixtures/mags_job_metadata.json @@ -0,0 +1,1010 @@ +{ + "workflowName": "nmdc_mags", + "workflowProcessingEvents": [ + { + "cromwellId": "cromid-083a56f", + "description": "PickedUp", + "timestamp": "2024-07-01T16:54:56.053Z", + "cromwellVersion": "77" + }, + { + "cromwellId": "cromid-083a56f", + "description": "Finished", + "timestamp": "2024-07-01T19:55:38.766Z", + "cromwellVersion": "77" + } + ], + "actualWorkflowLanguageVersion": "1.0", + "submittedFiles": { + "workflow": "version 1.0\nworkflow nmdc_mags {\n input {\n String proj\n String contig_file\n String sam_file\n String gff_file\n String proteins_file\n String cog_file\n String ec_file\n String ko_file\n String pfam_file\n String tigrfam_file\n String cath_funfam_file\n String smart_file\n String supfam_file\n String product_names_file\n String gene_phylogeny_file\n String lineage_file\n File? map_file\n String? scratch_dir\n Int cpu=32\n Int threads=64\n Int pthreads=1\n String gtdbtk_db=\"/refdata/GTDBTK_DB/gtdbtk_release207_v2\"\n String checkm_db=\"/refdata/checkM_DB/checkm_data_2015_01_16\"\n String eukcc2_db=\"/refdata/EUKCC2_DB/eukcc2_db_ver_1.2\"\n String package_container = \"microbiomedata/nmdc_mbin_vis:0.2.0\"\n String container = \"microbiomedata/nmdc_mbin@sha256:57930406fb5cc364bacfc904066519de6cdc2d0ceda9db0eebf2336df3ef5349\"\n }\n call stage {\n input:\n container=container,\n contig_file=contig_file,\n sam_file=sam_file,\n gff_file=gff_file,\n proteins_file=proteins_file,\n cog_file=cog_file,\n ec_file=ec_file,\n ko_file=ko_file,\n pfam_file=pfam_file,\n tigrfam_file=tigrfam_file,\n cath_funfam_file=cath_funfam_file,\n smart_file=smart_file,\n supfam_file=supfam_file,\n product_names_file=product_names_file,\n gene_phylogeny_file=gene_phylogeny_file,\n lineage_file=lineage_file\n }\n\n call mbin_nmdc {\n input: \n name=proj,\n fna = stage.contig,\n aln = stage.sam,\n gff = stage.gff,\n lineage=stage.lineage_tsv,\n threads = threads,\n pthreads = pthreads,\n gtdbtk_env = gtdbtk_db,\n checkm_env = checkm_db,\n eukcc2_env = eukcc2_db,\n map_file = map_file,\n mbin_container = container\n }\n call package {\n input: proj = proj,\n bins=flatten([mbin_nmdc.hqmq_bin_fasta_files,mbin_nmdc.lq_bin_fasta_files]),\n json_stats=mbin_nmdc.stats_json,\n gff_file=stage.gff,\n proteins_file=stage.proteins,\n cog_file=stage.cog,\n ec_file=stage.ec,\n ko_file=stage.ko,\n pfam_file=stage.pfam,\n tigrfam_file=stage.tigrfam,\n cath_funfam_file=stage.cath_funfam,\n smart_file=stage.smart,\n supfam_file=stage.supfam,\n product_names_file=stage.product_names,\n container=package_container\n }\n\n call finish_mags {\n input:\n container=\"microbiomedata/workflowmeta:1.1.1\",\n contigs=stage.contig,\n anno_gff=stage.gff,\n sorted_bam=stage.sam,\n proj=proj,\n start=stage.start,\n checkm = mbin_nmdc.checkm,\n bacsum= mbin_nmdc.bacsum,\n arcsum = mbin_nmdc.arcsum,\n short = mbin_nmdc.short,\n low = mbin_nmdc.low,\n unbinned = mbin_nmdc.unbinned,\n checkm = mbin_nmdc.checkm,\n mbin_sdb = mbin_nmdc.mbin_sdb,\n mbin_version = mbin_nmdc.mbin_version,\n stats_json = mbin_nmdc.stats_json,\n stats_tsv = mbin_nmdc.stats_tsv,\n hqmq_bin_fasta_files = mbin_nmdc.hqmq_bin_fasta_files,\n bin_fasta_files = mbin_nmdc.lq_bin_fasta_files,\n hqmq_bin_tarfiles = package.hqmq_bin_tarfiles,\n lq_bin_tarfiles = package.lq_bin_tarfiles,\n barplot = package.barplot,\n heatmap = package.heatmap,\n kronaplot = package.kronaplot,\n eukcc_file=mbin_nmdc.eukcc_csv,\n ko_matrix = package.ko_matrix\n }\n\n output {\n File final_hqmq_bins_zip = finish_mags.final_hqmq_bins_zip\n File final_lq_bins_zip = finish_mags.final_lq_bins_zip\n File final_gtdbtk_bac_summary = finish_mags.final_gtdbtk_bac_summary\n File final_gtdbtk_ar_summary = finish_mags.final_gtdbtk_ar_summary\n File short = finish_mags.final_short\n File low = finish_mags.final_lowDepth_fa\n File final_unbinned_fa = finish_mags.final_unbinned_fa\n File final_checkm = finish_mags.final_checkm\n File mags_version = finish_mags.final_version\n File final_stats_json = finish_mags.final_stats_json\n File barplot = finish_mags.final_barplot\n File heatmap = finish_mags.final_heatmap\n File kronaplot = finish_mags.final_kronaplot\n }\n\n\n}\n\ntask mbin_nmdc {\n input{\n File fna\n File aln\n File gff\n File lineage\n String name\n File? map_file\n Int? threads\n Int? pthreads\n String gtdbtk_env\n String checkm_env\n\t String? eukcc2_env\n String mbin_container\n }\n\n command<<<\n set -euo pipefail\n export GTDBTK_DATA_PATH=~{gtdbtk_env}\n export CHECKM_DATA_PATH=~{checkm_env}\n mbin.py ~{\"--threads \" + threads} ~{\"--pthreads \" + pthreads} ~{\"--map \" + map_file} ~{\"--eukccdb \" + eukcc2_env} --fna ~{fna} --gff ~{gff} --aln ~{aln} --lintsv ~{lineage}\n mbin_stats.py $PWD\n mbin_versions.py > mbin_nmdc_versions.log\n touch MAGs_stats.tsv\n \n if [ -f gtdbtk-output/gtdbtk.bac120.summary.tsv ]; then\n echo \"bacterial summary exists.\"\n else\n mkdir -p gtdbtk-output\n echo \"No Bacterial Results for ~{name}\" > gtdbtk-output/gtdbtk.bac120.summary.tsv\n fi\n\n if [ -f gtdbtk-output/gtdbtk.ar122.summary.tsv ]; then\n echo \"archaeal summary exists.\"\n else\n mkdir -p gtdbtk-output\n echo \"No Archaeal Results for ~{name}\" > gtdbtk-output/gtdbtk.ar122.summary.tsv\n fi\n\n if [ -f checkm-qa.out ]; then\n echo \"checkm summary exists.\"\n else\n mkdir -p gtdbtk-output\n echo \"No Checkm Results for ~{name}\" > checkm-qa.out\n fi\n\n if [ -f mbin.sdb ]; then\n echo \"mbin.sdb exists.\"\n else\n mkdir -p gtdbtk-output\n echo \"Mbin Sdb Could not be created for ~{name}\" > mbin.sdb\n fi\n\n if [ -f eukcc_output/eukcc.csv.final ]; then\n echo \"eukcc.csv.final exists.\"\n else\n mkdir -p eukcc_output\n echo \"No EUKCC2 result for ~{name}\" > eukcc_output/eukcc.csv.final\n fi\n >>>\n\n runtime{\n docker : mbin_container\n memory : \"120 G\"\n\t time : \"2:00:00\"\n cpu : threads\n }\n\n output{\n File short = \"bins.tooShort.fa\"\n File low = \"bins.lowDepth.fa\"\n File unbinned = \"bins.unbinned.fa\"\n File checkm = \"checkm-qa.out\"\n File stats_json = \"MAGs_stats.json\"\n File stats_tsv = \"MAGs_stats.tsv\"\n File mbin_sdb = \"mbin.sdb\"\n File mbin_version = \"mbin_nmdc_versions.log\"\n File bacsum = \"gtdbtk-output/gtdbtk.bac120.summary.tsv\"\n File arcsum = \"gtdbtk-output/gtdbtk.ar122.summary.tsv\"\n\t File eukcc_csv = \"eukcc_output/eukcc.csv.final\"\n Array[File] hqmq_bin_fasta_files = glob(\"hqmq-metabat-bins/*fa\")\n Array[File] lq_bin_fasta_files = glob(\"filtered-metabat-bins/*fa\")\n } \n}\n\n\ntask stage {\n input{\n String container\n String contig_file\n String sam_file\n String gff_file\n String proteins_file\n String cog_file\n String ec_file\n String ko_file\n String pfam_file\n String tigrfam_file\n String cath_funfam_file\n String smart_file\n String supfam_file\n String product_names_file\n String gene_phylogeny_file\n String lineage_file\n String contigs_out=\"contigs.fasta\"\n String bam_out=\"pairedMapped_sorted.bam\"\n String gff_out=\"functional_annotation.gff\"\n String proteins_out=\"proteins.faa\"\n String cog_out=\"cog.gff\"\n String ec_out=\"ec.tsv\"\n String ko_out=\"ko.tsv\"\n String pfam_out=\"pfam.gff\"\n String tigrfam_out=\"tigrfam.gff\"\n String cath_funfam_out=\"cath_funfam.gff\"\n String smart_out=\"smart.gff\"\n String supfam_out=\"supfam.gff\"\n String products_out=\"products.tsv\"\n String gene_phylogeny_out=\"gene_phylogeny.tsv\"\n String lineage_out=\"lineage.tsv\"\n }\n command<<<\n\n set -e\n\n function stage() {\n in=$1\n out=$2\n if [ $( echo $in |egrep -c \"https*:\") -gt 0 ] ; then\n wget $in -O $out\n else\n ln $in $out || cp $in $out\n fi\n }\n\n stage ~{contig_file} ~{contigs_out}\n stage ~{sam_file} ~{bam_out}\n stage ~{gff_file} ~{gff_out}\n stage ~{proteins_file} ~{proteins_out}\n stage ~{cog_file} ~{cog_out}\n stage ~{ec_file} ~{ec_out}\n stage ~{ko_file} ~{ko_out}\n stage ~{pfam_file} ~{pfam_out}\n stage ~{tigrfam_file} ~{tigrfam_out}\n stage ~{cath_funfam_file} ~{cath_funfam_out}\n stage ~{smart_file} ~{smart_out}\n stage ~{supfam_file} ~{supfam_out}\n stage ~{product_names_file} ~{products_out}\n stage ~{gene_phylogeny_file} ~{gene_phylogeny_out}\n stage ~{lineage_file} ~{lineage_out}\n\n date --iso-8601=seconds > start.txt\n\n >>>\n\n output{\n File contig = \"contigs.fasta\"\n File sam = \"pairedMapped_sorted.bam\"\n File gff = \"functional_annotation.gff\"\n File proteins = \"proteins.faa\"\n File cog = \"cog.gff\"\n File ec = \"ec.tsv\"\n File ko = \"ko.tsv\"\n File pfam = \"pfam.gff\"\n File tigrfam = \"tigrfam.gff\"\n File cath_funfam = \"cath_funfam.gff\"\n File smart = \"smart.gff\"\n File supfam = \"supfam.gff\"\n File product_names = \"products.tsv\"\n File gene_phylogeny = \"gene_phylogeny.tsv\"\n File lineage_tsv = \"lineage.tsv\"\n String start = read_string(\"start.txt\")\n }\n runtime {\n memory: \"1 GiB\"\n cpu: 2\n maxRetries: 1\n docker: container\n }\n}\n\n\ntask package{\n input{\n String proj\n String prefix=sub(proj, \":\", \"_\")\n Array[File] bins\n File json_stats\n File gff_file\n File proteins_file\n File cog_file\n File ec_file\n File ko_file\n File pfam_file\n File tigrfam_file\n File cath_funfam_file\n File smart_file\n File supfam_file\n File product_names_file\n String container \n }\n command<<<\n set -e\n create_tarfiles.py ~{prefix} \\\n ~{json_stats} ~{gff_file} ~{proteins_file} ~{cog_file} \\\n ~{ec_file} ~{ko_file} ~{pfam_file} ~{tigrfam_file} \\\n ~{cath_funfam_file} ~{smart_file} ~{supfam_file} \\\n ~{product_names_file} \\\n ~{sep=\" \" bins}\n\n if [ -f ~{prefix}_heatmap.pdf ]; then\n echo \"KO analysis plot exists.\"\n else\n echo \"No KO analysis result for ~{proj}\" > ~{prefix}_heatmap.pdf\n echo \"No KO analysis result for ~{proj}\" > ~{prefix}_barplot.pdf\n echo \"No KO analysis result for ~{proj}\" > ~{prefix}_ko_krona.html\n echo \"No KO analysis result for ~{proj}\" > ~{prefix}_module_completeness.tab\n fi\n >>>\n output {\n Array[File] hqmq_bin_tarfiles = flatten([glob(\"*_HQ.tar.gz\"), glob(\"*_MQ.tar.gz\")])\n Array[File] lq_bin_tarfiles = glob(\"*_LQ.tar.gz\") \n File barplot = prefix + \"_barplot.pdf\"\n File heatmap = prefix + \"_heatmap.pdf\"\n File kronaplot = prefix + \"_ko_krona.html\"\n File ko_matrix = prefix + \"_module_completeness.tab\"\n }\n runtime {\n docker: container\n memory: \"1 GiB\"\n cpu: 1\n }\n}\n\ntask finish_mags {\n input{\n String container\n File contigs\n File anno_gff\n File sorted_bam\n File mbin_sdb\n File mbin_version\n String proj\n String prefix=sub(proj, \":\", \"_\")\n String start\n File bacsum\n File arcsum\n File? short\n File? low\n File? unbinned\n File? checkm\n Array[File] hqmq_bin_fasta_files\n Array[File] bin_fasta_files\n Array[File] hqmq_bin_tarfiles\n Array[File] lq_bin_tarfiles\n File stats_json\n File stats_tsv\n Int n_hqmq=length(hqmq_bin_tarfiles)\n Int n_lq=length(lq_bin_tarfiles)\n File barplot\n File heatmap\n File kronaplot\n File ko_matrix\n File eukcc_file\n }\n command<<<\n set -e\n end=`date --iso-8601=seconds`\n\n ln ~{low} ~{prefix}_bins.lowDepth.fa\n ln ~{short} ~{prefix}_bins.tooShort.fa\n ln ~{unbinned} ~{prefix}_bins.unbinned.fa\n ln ~{checkm} ~{prefix}_checkm_qa.out\n ln ~{mbin_version} ~{prefix}_bin.info\n ln ~{bacsum} ~{prefix}_gtdbtk.bac122.summary.tsv\n ln ~{arcsum} ~{prefix}_gtdbtk.ar122.summary.tsv\n ln ~{barplot} ~{prefix}_barplot.pdf\n ln ~{heatmap} ~{prefix}_heatmap.pdf\n ln ~{kronaplot} ~{prefix}_kronaplot.html\n ln ~{ko_matrix} ~{prefix}_ko_matrix.txt\n\n # cp all tarfiles, zip them under prefix, if empty touch no_mags.txt\n mkdir -p hqmq\n if [ ~{n_hqmq} -gt 0 ] ; then\n (cd hqmq && cp ~{sep=\" \" hqmq_bin_tarfiles} .)\n (cd hqmq && cp ~{mbin_sdb} .)\n (cd hqmq && zip -j ../~{prefix}_hqmq_bin.zip *tar.gz mbin.sdb ../*pdf ../*kronaplot.html ../*ko_matrix.txt)\n else\n (cd hqmq && touch no_hqmq_mags.txt)\n (cd hqmq && cp ~{mbin_sdb} .)\n (cd hqmq && zip ../~{prefix}_hqmq_bin.zip *.txt mbin.sdb)\n fi\n\n mkdir -p lq\n if [ ~{n_lq} -gt 0 ] ; then\n (cd lq && cp ~{sep=\" \" lq_bin_tarfiles} .)\n (cd lq && cp ~{mbin_sdb} .)\n (cd lq && zip -j ../~{prefix}_lq_bin.zip *tar.gz mbin.sdb ~{eukcc_file} ../*pdf ../*kronaplot.html ../*ko_matrix.txt)\n else\n (cd lq && touch no_lq_mags.txt)\n (cd lq && cp ~{mbin_sdb} .)\n (cd lq && zip ../~{prefix}_lq_bin.zip *.txt mbin.sdb ~{eukcc_file} )\n fi\n\n # Fix up attribute name\n cat ~{stats_json} | \\\n sed 's/: null/: \"null\"/g' | \\\n sed 's/lowDepth_/low_depth_/' > ~{prefix}_mags_stats.json\n\n >>>\n\n output {\n File final_checkm = \"~{prefix}_checkm_qa.out\"\n File final_hqmq_bins_zip = \"~{prefix}_hqmq_bin.zip\"\n File final_lq_bins_zip = \"~{prefix}_lq_bin.zip\"\n File final_stats_json = \"~{prefix}_mags_stats.json\"\n File final_gtdbtk_bac_summary = \"~{prefix}_gtdbtk.bac122.summary.tsv\"\n File final_gtdbtk_ar_summary = \"~{prefix}_gtdbtk.ar122.summary.tsv\"\n File final_lowDepth_fa = \"~{prefix}_bins.lowDepth.fa\"\n File final_unbinned_fa = \"~{prefix}_bins.unbinned.fa\"\n File final_short = \"~{prefix}_bins.tooShort.fa\"\n File final_version = \"~{prefix}_bin.info\"\n File final_kronaplot = \"~{prefix}_kronaplot.html\"\n File final_heatmap = \"~{prefix}_heatmap.pdf\"\n File final_barplot = \"~{prefix}_barplot.pdf\"\n }\n\n runtime {\n memory: \"10 GiB\"\n cpu: 4\n maxRetries: 1\n docker: container\n }\n}\n", + "root": "", + "options": "{\n\n}", + "inputs": "{\"nmdc_mags.cath_funfam_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_cath_funfam.gff\",\"nmdc_mags.cog_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_cog.gff\",\"nmdc_mags.contig_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgas-13-56028x05.1/nmdc_wfmgas-13-56028x05.1_contigs.fna\",\"nmdc_mags.ec_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_ec.tsv\",\"nmdc_mags.gene_phylogeny_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_gene_phylogeny.tsv\",\"nmdc_mags.gff_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_functional_annotation.gff\",\"nmdc_mags.ko_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_ko.tsv\",\"nmdc_mags.lineage_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_scaffold_lineage.tsv\",\"nmdc_mags.pfam_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_pfam.gff\",\"nmdc_mags.product_names_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_product_names.tsv\",\"nmdc_mags.proj\":\"nmdc:wfmag-12-fxwdrv82.1\",\"nmdc_mags.proteins_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_proteins.faa\",\"nmdc_mags.sam_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgas-13-56028x05.1/nmdc_wfmgas-13-56028x05.1_pairedMapped_sorted.bam\",\"nmdc_mags.smart_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_smart.gff\",\"nmdc_mags.supfam_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_supfam.gff\",\"nmdc_mags.tigrfam_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_tigrfam.gff\"}", + "workflowUrl": "", + "labels": "{\"release\": \"v1.3.2\", \"wdl\": \"mbin_nmdc.wdl\", \"git_repo\": \"https://github.com/microbiomedata/metaMAGs\", \"pipeline_version\": \"v1.3.2\", \"pipeline\": \"mbin_nmdc.wdl\", \"activity_id\": \"nmdc:wfmag-12-fxwdrv82.1\", \"opid\": \"nmdc:sys0v1137690\", \"submitter\": \"nmdcda\"}" + }, + "calls": { + "nmdc_mags.stage": [ + { + "executionStatus": "Done", + "stdout": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/stdout", + "backendStatus": "Done", + "compressedDockerSize": 1221549662, + "commandLine": "set -e\n\n function stage() {\n in=$1\n out=$2\n if [ $( echo $in |egrep -c \"https*:\") -gt 0 ] ; then\n wget $in -O $out\n else\n ln $in $out || cp $in $out\n fi\n }\n\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgas-13-56028x05.1/nmdc_wfmgas-13-56028x05.1_contigs.fna contigs.fasta\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgas-13-56028x05.1/nmdc_wfmgas-13-56028x05.1_pairedMapped_sorted.bam pairedMapped_sorted.bam\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_functional_annotation.gff functional_annotation.gff\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_proteins.faa proteins.faa\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_cog.gff cog.gff\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_ec.tsv ec.tsv\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_ko.tsv ko.tsv\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_pfam.gff pfam.gff\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_tigrfam.gff tigrfam.gff\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_cath_funfam.gff cath_funfam.gff\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_smart.gff smart.gff\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_supfam.gff supfam.gff\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_product_names.tsv products.tsv\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_gene_phylogeny.tsv gene_phylogeny.tsv\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_scaffold_lineage.tsv lineage.tsv\n\ndate --iso-8601=seconds > start.txt", + "shardIndex": -1, + "outputs": { + "cog": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/cog.gff", + "ec": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/ec.tsv", + "gene_phylogeny": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/gene_phylogeny.tsv", + "tigrfam": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/tigrfam.gff", + "gff": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/functional_annotation.gff", + "cath_funfam": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/cath_funfam.gff", + "smart": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/smart.gff", + "sam": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/pairedMapped_sorted.bam", + "supfam": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/supfam.gff", + "proteins": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/proteins.faa", + "pfam": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/pfam.gff", + "ko": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/ko.tsv", + "contig": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/contigs.fasta", + "start": "2024-07-01T17:24:07+00:00", + "lineage_tsv": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/lineage.tsv", + "product_names": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/products.tsv" + }, + "runtimeAttributes": { + "runtime_minutes": "120", + "priority": "0", + "disk": "0.244140625 GB", + "failOnStderr": "false", + "continueOnReturnCode": "0", + "docker": "microbiomedata/nmdc_mbin@sha256:57930406fb5cc364bacfc904066519de6cdc2d0ceda9db0eebf2336df3ef5349", + "maxRetries": "1", + "cpu": "2", + "memory": "1 GB" + }, + "callCaching": { + "allowResultReuse": true, + "hit": false, + "result": "Cache Miss", + "hashes": { + "output count": "C74D97B01EAE257E44AA9D5BADE97BAF", + "runtime attribute": { + "docker": "F72A72C28765615372494FF01D657EF0", + "continueOnReturnCode": "CFCD208495D565EF66E7DFF9F98764DA", + "failOnStderr": "68934A3E9455FA72420237EB05902327" + }, + "output expression": { + "File supfam": "57E594B7FFCEEB39CF06D332A5562FAC", + "File gff": "1480321F9248DF00AB8D641DB3A80283", + "File smart": "618329130FDCC2FA17D21157AA4341B7", + "File contig": "B3B220254A872C9A27EEEA164CEA7180", + "File lineage_tsv": "AA53DB68E93BC4D66EA98BBFF733FA07", + "File tigrfam": "7C3B7598C68086C56536F1484BF6B4E7", + "File ko": "C0DF3CDC23F8B885E8F2D1C783D961EB", + "File gene_phylogeny": "77F6C9BA0C2A6969FEC44C172C557157", + "String start": "9180F439602EB455553A080D43DB2473", + "File cog": "57E7D9F6C6027CB5F88CF52197A0E09A", + "File sam": "297A5965C834F32050A732873F038C12", + "File proteins": "78FF964764B16900FF2D3F5A7B34FB7B", + "File product_names": "121113CFD93DA6D5AB12948E5AEC22B3", + "File pfam": "E1434D4AE774C068C3993C5DA573CCC1", + "File cath_funfam": "5367DC3284E76222A31899AF8AB88BE0", + "File ec": "9B2989B8720CFF2E291E152C83908B58" + }, + "input count": "C16A5320FA475530D9583C34FD356EF5", + "backend name": "24B80D5AA1F64928B14AC8407909E586", + "command template": "945A6BCF429CF7799750A5446DE41E8D", + "input": { + "String sam_file": "19E9B366BAA40BD1CEAD418682EF03D0", + "String smart_file": "FCD3094FC3D4A65194D8D779657644DD", + "String contig_file": "972176A3B31DB605CAC2CB281E1BC924", + "String tigrfam_out": "7C3B7598C68086C56536F1484BF6B4E7", + "String proteins_file": "91CB7DAF44756D25FB41C1120FF38528", + "String container": "D7A0D4DC020579C472C721E2466C221B", + "String gene_phylogeny_file": "27B9190B78398205DA4E738C38F57677", + "String ec_out": "9B2989B8720CFF2E291E152C83908B58", + "String cath_funfam_file": "00D75F5AECD0AC5F2102096DCF140057", + "String cog_out": "57E7D9F6C6027CB5F88CF52197A0E09A", + "String contigs_out": "B3B220254A872C9A27EEEA164CEA7180", + "String ec_file": "869EB30CAA5362F12D0CEDB87B865FC9", + "String pfam_file": "1AD2F0CA348A7494974578FD03A73263", + "String ko_file": "43B94099DB185A0E7A97650AF74218F3", + "String supfam_out": "57E594B7FFCEEB39CF06D332A5562FAC", + "String products_out": "121113CFD93DA6D5AB12948E5AEC22B3", + "String smart_out": "618329130FDCC2FA17D21157AA4341B7", + "String proteins_out": "78FF964764B16900FF2D3F5A7B34FB7B", + "String gff_file": "5BFC9D19D9A3AA6346CA57C0981D991C", + "String supfam_file": "854A0384656E64DA95C89B1688AF4B47", + "String tigrfam_file": "5E3FDBBD21DE9295237F3AF1F9B81EDE", + "String lineage_out": "AA53DB68E93BC4D66EA98BBFF733FA07", + "String ko_out": "C0DF3CDC23F8B885E8F2D1C783D961EB", + "String lineage_file": "B3823B980A369F2AFEE9097020EB450B", + "String gene_phylogeny_out": "77F6C9BA0C2A6969FEC44C172C557157", + "String cath_funfam_out": "5367DC3284E76222A31899AF8AB88BE0", + "String gff_out": "1480321F9248DF00AB8D641DB3A80283", + "String pfam_out": "E1434D4AE774C068C3993C5DA573CCC1", + "String product_names_file": "D7A68F21BCA5FE1295BB4B75EBC0F004", + "String bam_out": "297A5965C834F32050A732873F038C12", + "String cog_file": "99130451CB1A2AEA46688AA60AE0AA9E" + } + }, + "effectiveCallCachingMode": "ReadAndWriteCache" + }, + "inputs": { + "proteins_out": "proteins.faa", + "pfam_out": "pfam.gff", + "smart_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_smart.gff", + "lineage_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_scaffold_lineage.tsv", + "cog_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_cog.gff", + "cog_out": "cog.gff", + "container": "microbiomedata/nmdc_mbin@sha256:57930406fb5cc364bacfc904066519de6cdc2d0ceda9db0eebf2336df3ef5349", + "proteins_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_proteins.faa", + "gene_phylogeny_out": "gene_phylogeny.tsv", + "product_names_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_product_names.tsv", + "contigs_out": "contigs.fasta", + "tigrfam_out": "tigrfam.gff", + "cath_funfam_out": "cath_funfam.gff", + "cath_funfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_cath_funfam.gff", + "supfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_supfam.gff", + "gene_phylogeny_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_gene_phylogeny.tsv", + "supfam_out": "supfam.gff", + "lineage_out": "lineage.tsv", + "ko_out": "ko.tsv", + "ko_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_ko.tsv", + "ec_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_ec.tsv", + "gff_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_functional_annotation.gff", + "smart_out": "smart.gff", + "pfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_pfam.gff", + "gff_out": "functional_annotation.gff", + "bam_out": "pairedMapped_sorted.bam", + "products_out": "products.tsv", + "ec_out": "ec.tsv", + "tigrfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_tigrfam.gff", + "sam_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgas-13-56028x05.1/nmdc_wfmgas-13-56028x05.1_pairedMapped_sorted.bam", + "contig_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgas-13-56028x05.1/nmdc_wfmgas-13-56028x05.1_contigs.fna" + }, + "returnCode": 0, + "jobId": "157723", + "backend": "HtCondor", + "end": "2024-07-01T17:28:13.648Z", + "dockerImageUsed": "microbiomedata/nmdc_mbin@sha256:57930406fb5cc364bacfc904066519de6cdc2d0ceda9db0eebf2336df3ef5349", + "stderr": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/stderr", + "callRoot": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage", + "attempt": 1, + "executionEvents": [ + { + "startTime": "2024-07-01T17:28:12.696Z", + "description": "UpdatingJobStore", + "endTime": "2024-07-01T17:28:13.648Z" + }, + { + "startTime": "2024-07-01T16:55:06.338Z", + "description": "RunningJob", + "endTime": "2024-07-01T17:28:10.225Z" + }, + { + "startTime": "2024-07-01T16:55:06.331Z", + "description": "CallCacheReading", + "endTime": "2024-07-01T16:55:06.338Z" + }, + { + "startTime": "2024-07-01T16:54:58.165Z", + "description": "Pending", + "endTime": "2024-07-01T16:54:58.165Z" + }, + { + "startTime": "2024-07-01T16:54:58.165Z", + "description": "RequestingExecutionToken", + "endTime": "2024-07-01T16:55:05.794Z" + }, + { + "startTime": "2024-07-01T16:55:05.794Z", + "description": "PreparingJob", + "endTime": "2024-07-01T16:55:06.331Z" + }, + { + "startTime": "2024-07-01T17:28:10.225Z", + "description": "UpdatingCallCache", + "endTime": "2024-07-01T17:28:12.696Z" + }, + { + "startTime": "2024-07-01T16:55:05.794Z", + "description": "WaitingForValueStore", + "endTime": "2024-07-01T16:55:05.794Z" + } + ], + "start": "2024-07-01T16:54:58.165Z" + } + ], + "nmdc_mags.mbin_nmdc": [ + { + "executionStatus": "Done", + "stdout": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/stdout", + "backendStatus": "Done", + "compressedDockerSize": 1221549662, + "commandLine": "set -euo pipefail\nexport GTDBTK_DATA_PATH=/refdata/GTDBTK_DB/gtdbtk_release207_v2\nexport CHECKM_DATA_PATH=/refdata/checkM_DB/checkm_data_2015_01_16\nmbin.py --threads 64 --pthreads 1 --eukccdb /refdata/EUKCC2_DB/eukcc2_db_ver_1.2 --fna /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/inputs/-51889329/contigs.fasta --gff /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/inputs/-51889329/functional_annotation.gff --aln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/inputs/-51889329/pairedMapped_sorted.bam --lintsv /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/inputs/-51889329/lineage.tsv\nmbin_stats.py $PWD\nmbin_versions.py > mbin_nmdc_versions.log\ntouch MAGs_stats.tsv\n\nif [ -f gtdbtk-output/gtdbtk.bac120.summary.tsv ]; then\n echo \"bacterial summary exists.\"\nelse\n mkdir -p gtdbtk-output\n echo \"No Bacterial Results for nmdc:wfmag-12-fxwdrv82.1\" > gtdbtk-output/gtdbtk.bac120.summary.tsv\nfi\n\nif [ -f gtdbtk-output/gtdbtk.ar122.summary.tsv ]; then\n echo \"archaeal summary exists.\"\nelse\n mkdir -p gtdbtk-output\n echo \"No Archaeal Results for nmdc:wfmag-12-fxwdrv82.1\" > gtdbtk-output/gtdbtk.ar122.summary.tsv\nfi\n\nif [ -f checkm-qa.out ]; then\n echo \"checkm summary exists.\"\nelse\n mkdir -p gtdbtk-output\n echo \"No Checkm Results for nmdc:wfmag-12-fxwdrv82.1\" > checkm-qa.out\nfi\n\nif [ -f mbin.sdb ]; then\n echo \"mbin.sdb exists.\"\nelse\n mkdir -p gtdbtk-output\n echo \"Mbin Sdb Could not be created for nmdc:wfmag-12-fxwdrv82.1\" > mbin.sdb\nfi\n\nif [ -f eukcc_output/eukcc.csv.final ]; then\n echo \"eukcc.csv.final exists.\"\nelse\n mkdir -p eukcc_output\n echo \"No EUKCC2 result for nmdc:wfmag-12-fxwdrv82.1\" > eukcc_output/eukcc.csv.final\nfi", + "shardIndex": -1, + "outputs": { + "stats_json": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/MAGs_stats.json", + "eukcc_csv": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/eukcc_output/eukcc.csv.final", + "arcsum": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/gtdbtk-output/gtdbtk.ar122.summary.tsv", + "short": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/bins.tooShort.fa", + "stats_tsv": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/MAGs_stats.tsv", + "hqmq_bin_fasta_files": [ + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.1.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.11.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.12.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.17.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.20.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.21.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.22.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.33.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.37.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.38.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.40.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.42.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.43.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.7.fa" + ], + "low": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/bins.lowDepth.fa", + "lq_bin_fasta_files": [ + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.10.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.13.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.14.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.15.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.16.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.18.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.19.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.2.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.23.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.24.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.25.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.26.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.27.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.28.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.29.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.3.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.30.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.31.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.32.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.34.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.35.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.36.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.39.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.4.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.41.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.44.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.45.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.46.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.47.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.5.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.6.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.8.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.9.fa" + ], + "checkm": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/checkm-qa.out", + "unbinned": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/bins.unbinned.fa", + "mbin_version": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/mbin_nmdc_versions.log", + "mbin_sdb": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/mbin.sdb", + "bacsum": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/gtdbtk-output/gtdbtk.bac120.summary.tsv" + }, + "runtimeAttributes": { + "runtime_minutes": "120", + "priority": "0", + "disk": "0.244140625 GB", + "failOnStderr": "false", + "continueOnReturnCode": "0", + "docker": "microbiomedata/nmdc_mbin@sha256:57930406fb5cc364bacfc904066519de6cdc2d0ceda9db0eebf2336df3ef5349", + "maxRetries": "0", + "cpu": "64", + "memory": "120 GB" + }, + "callCaching": { + "allowResultReuse": true, + "hit": false, + "result": "Cache Miss", + "hashes": { + "output count": "C51CE410C124A10E0DB5E4B97FC2AF39", + "runtime attribute": { + "docker": "F72A72C28765615372494FF01D657EF0", + "continueOnReturnCode": "CFCD208495D565EF66E7DFF9F98764DA", + "failOnStderr": "68934A3E9455FA72420237EB05902327" + }, + "output expression": { + "File arcsum": "787FE996F609C8C519F3CAF8A1B6CC4C", + "File stats_tsv": "93F275502607D245963D78AA311B330F", + "Array(File) hqmq_bin_fasta_files": "9FEEFEF01742A6D55705986B498E72D6", + "File stats_json": "BB64C91BA00FBFA822525C0CA83F7551", + "File bacsum": "0716BEAF9194B529A691ABA8169DF4A7", + "File mbin_version": "30DBADB0745B9CFBE83733A77E7B9ED2", + "File unbinned": "C40A14993B2B418745AE85C7D38560A0", + "File mbin_sdb": "C1F64B80E92F88AAAFD0F32281C86583", + "File checkm": "8014C70611B1425FB83B75AC0A646927", + "File eukcc_csv": "34B03AA4F494AF7218F14090EE042A0E", + "Array(File) lq_bin_fasta_files": "9A9271C31FA80817D2D58CE66E57A99C", + "File low": "DD7E48A19D33A1E113823D26F8B1DA5D", + "File short": "7E351A09B6E8E88DCE39A4E9D6577D62" + }, + "input count": "6512BD43D9CAA6E02C990B0A82652DCA", + "backend name": "24B80D5AA1F64928B14AC8407909E586", + "command template": "4A7638A485C9681C06B748116359536B", + "input": { + "File gff": "ea2c8bf4db0775c024361a3681f5e365", + "String gtdbtk_env": "CB23C84675C6493CC5F510B167E1EC23", + "String checkm_env": "12F5EA073DE844C333782082E5FFDEB9", + "File aln": "61d7bc083c1417c3d390cdac26c24faf", + "String mbin_container": "D7A0D4DC020579C472C721E2466C221B", + "String name": "0B6C033A7D83F6E4B430E858C83BD9F7", + "Int pthreads": "C4CA4238A0B923820DCC509A6F75849B", + "File fna": "f89ee38065ee7324bdbd46c627faae33", + "String eukcc2_env": "7EF9D719D5795EC372BD096B3A0766AE", + "Int threads": "EA5D2F1C4608232E07D3AA3D998E5135", + "File lineage": "a0062f034e4b177ab3cd11d9ffc1470f" + } + }, + "effectiveCallCachingMode": "ReadAndWriteCache" + }, + "inputs": { + "map_file": null, + "name": "nmdc:wfmag-12-fxwdrv82.1", + "lineage": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/lineage.tsv", + "gff": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/functional_annotation.gff", + "fna": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/contigs.fasta", + "aln": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/pairedMapped_sorted.bam", + "mbin_container": "microbiomedata/nmdc_mbin@sha256:57930406fb5cc364bacfc904066519de6cdc2d0ceda9db0eebf2336df3ef5349", + "eukcc2_env": "/refdata/EUKCC2_DB/eukcc2_db_ver_1.2", + "gtdbtk_env": "/refdata/GTDBTK_DB/gtdbtk_release207_v2", + "threads": 64, + "pthreads": 1, + "checkm_env": "/refdata/checkM_DB/checkm_data_2015_01_16" + }, + "returnCode": 0, + "jobId": "157724", + "backend": "HtCondor", + "end": "2024-07-01T18:26:22.636Z", + "dockerImageUsed": "microbiomedata/nmdc_mbin@sha256:57930406fb5cc364bacfc904066519de6cdc2d0ceda9db0eebf2336df3ef5349", + "stderr": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/stderr", + "callRoot": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc", + "attempt": 1, + "executionEvents": [ + { + "startTime": "2024-07-01T17:28:15.324Z", + "description": "RequestingExecutionToken", + "endTime": "2024-07-01T17:28:15.794Z" + }, + { + "startTime": "2024-07-01T17:28:15.794Z", + "description": "WaitingForValueStore", + "endTime": "2024-07-01T17:28:15.794Z" + }, + { + "startTime": "2024-07-01T18:26:21.670Z", + "description": "UpdatingJobStore", + "endTime": "2024-07-01T18:26:22.636Z" + }, + { + "startTime": "2024-07-01T17:28:15.801Z", + "description": "CallCacheReading", + "endTime": "2024-07-01T17:28:15.806Z" + }, + { + "startTime": "2024-07-01T17:28:15.794Z", + "description": "PreparingJob", + "endTime": "2024-07-01T17:28:15.801Z" + }, + { + "startTime": "2024-07-01T17:28:15.806Z", + "description": "RunningJob", + "endTime": "2024-07-01T18:26:19.764Z" + }, + { + "startTime": "2024-07-01T18:26:19.764Z", + "description": "UpdatingCallCache", + "endTime": "2024-07-01T18:26:21.670Z" + }, + { + "startTime": "2024-07-01T17:28:15.324Z", + "description": "Pending", + "endTime": "2024-07-01T17:28:15.324Z" + } + ], + "start": "2024-07-01T17:28:15.324Z" + } + ], + "nmdc_mags.package": [ + { + "executionStatus": "Done", + "stdout": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/execution/stdout", + "backendStatus": "Done", + "compressedDockerSize": 493656270, + "commandLine": " set -e\n create_tarfiles.py nmdc_wfmag-12-fxwdrv82.1 \\\n /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-1737589708/MAGs_stats.json /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-51889329/functional_annotation.gff /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-51889329/proteins.faa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-51889329/cog.gff \\\n /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-51889329/ec.tsv /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-51889329/ko.tsv /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-51889329/pfam.gff /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-51889329/tigrfam.gff \\\n /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-51889329/cath_funfam.gff /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-51889329/smart.gff /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-51889329/supfam.gff \\\n /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-51889329/products.tsv \\\n /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.1.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.11.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.12.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.17.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.20.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.21.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.22.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.33.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.37.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.38.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.40.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.42.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.43.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.7.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.10.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.13.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.14.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.15.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.16.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.18.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.19.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.2.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.23.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.24.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.25.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.26.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.27.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.28.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.29.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.3.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.30.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.31.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.32.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.34.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.35.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.36.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.39.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.4.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.41.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.44.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.45.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.46.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.47.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.5.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.6.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.8.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.9.fa\n\nif [ -f nmdc_wfmag-12-fxwdrv82.1_heatmap.pdf ]; then\n echo \"KO analysis plot exists.\"\nelse\n echo \"No KO analysis result for nmdc:wfmag-12-fxwdrv82.1\" > nmdc_wfmag-12-fxwdrv82.1_heatmap.pdf\n echo \"No KO analysis result for nmdc:wfmag-12-fxwdrv82.1\" > nmdc_wfmag-12-fxwdrv82.1_barplot.pdf\n echo \"No KO analysis result for nmdc:wfmag-12-fxwdrv82.1\" > nmdc_wfmag-12-fxwdrv82.1_ko_krona.html\n echo \"No KO analysis result for nmdc:wfmag-12-fxwdrv82.1\" > nmdc_wfmag-12-fxwdrv82.1_module_completeness.tab\nfi", + "shardIndex": -1, + "outputs": { + "barplot": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/execution/nmdc_wfmag-12-fxwdrv82.1_barplot.pdf", + "ko_matrix": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/execution/nmdc_wfmag-12-fxwdrv82.1_module_completeness.tab", + "heatmap": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/execution/nmdc_wfmag-12-fxwdrv82.1_heatmap.pdf", + "hqmq_bin_tarfiles": [], + "kronaplot": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/execution/nmdc_wfmag-12-fxwdrv82.1_ko_krona.html", + "lq_bin_tarfiles": [] + }, + "runtimeAttributes": { + "runtime_minutes": "120", + "priority": "0", + "disk": "0.244140625 GB", + "failOnStderr": "false", + "continueOnReturnCode": "0", + "docker": "microbiomedata/nmdc_mbin_vis:0.2.0", + "maxRetries": "0", + "cpu": "1", + "memory": "1 GB" + }, + "callCaching": { + "allowResultReuse": true, + "hit": false, + "result": "Cache Miss", + "hashes": { + "output count": "1679091C5A880FAF6FB5E6087EB1B2DC", + "runtime attribute": { + "docker": "15EF5F3946F3FB97DB9077BDFB2EC05E", + "continueOnReturnCode": "CFCD208495D565EF66E7DFF9F98764DA", + "failOnStderr": "68934A3E9455FA72420237EB05902327" + }, + "output expression": { + "File barplot": "29CD6D98A3A8C29CA8D89F6B93286D44", + "Array(File) hqmq_bin_tarfiles": "4886452572964CFDBDE8441A90C90F28", + "File ko_matrix": "763DF5AD0AE3C8678D01B116FF35A06F", + "File kronaplot": "2CB90AF81AD50DF61944B0002F5DDBC5", + "File heatmap": "63EB363AF392505E28D439A85E705E8E", + "Array(File) lq_bin_tarfiles": "0D92AD8038C4661C1A37555A1B914BC6" + }, + "input count": "44F683A84163B3523AFE57C2E008BC8C", + "backend name": "24B80D5AA1F64928B14AC8407909E586", + "command template": "7CA922014F224723BB6DFB7456A87D7B", + "input": { + "File gff_file": "ea2c8bf4db0775c024361a3681f5e365", + "File cath_funfam_file": "f6035ac8fcad366b2aaf510c47e31946", + "File cog_file": "b7cf519f94ae0dcf4af94df2e537f557", + "String container": "E4724EB95C045566807727E42591F619", + "File supfam_file": "bfed8ab1bd61358ec2fe1395b6fb5ea9", + "File pfam_file": "65f06b443535f999e1c1e8c905256011", + "File ec_file": "5c09a91c4e53978aeb9d4483b0cc11a4", + "File bins": [ + "4d9b99511546584bce7c7fab1fb3239a", + "a4eac78eb4c8b0ecf0e8756233771ace", + "35b44a186ca246254e2ed7e71863a4b7", + "07be3fec5a7a91e3a2cf645dc5dd8f35", + "e0aad9a4ab114fce04b3c8f237947177", + "7de3db5f8dfb1dd3939a57988df964c3", + "c3b17de2b3762611e07796c1d9f071db", + "7371ac1f2883778219ea5384aab65cf0", + "10ffa413343a45a3e2483ddde6714372", + "df5bc7b394e2ff05ad66b13e678af5ec", + "d86ad0e91c260e66ae0aae5215117766", + "f31f3cecbc75788f542bf8a282e809d4", + "b4a919b6d17530372401f4265f4280d5", + "fb687e086e66cfd6d1c9562227ff82eb", + "4c43133298d71002259f856720995efb", + "0bb311382c673962f0f7587eb614ea33", + "c6b739ac09ea09f5a5b243fc81ed075f", + "d8ed5ba779e0c17bfac43e46717b7df6", + "9c752f3ab53db1304bc115d7b112122f", + "9d52837869fcb54b8f35d2b07c536d16", + "6264770bfb30cd917cba490fe4496827", + "5caf431a8a98222902e18d6b619b94e3", + "ddab74b9de92c0366bd6ac3a90ec3ac5", + "b636e39db668cf7f1082b38de5565c84", + "eeef47252a10fc24c9ca367f38479a98", + "17899bef5c05f0f67c0688e9bfa7b84f", + "3cd99095e4e49337f1a0ba8e2e78b8d4", + "557f966b1240c5ad8e0e881db3b7c763", + "445e37d0f2ec02326de0a67215bc9b92", + "ee0bfc516fa184e588c21a943c2b75e0", + "8ceebf37f47bc4eb3d440cd4f97ad726", + "015dc9c1217aff8439ddf12df1c870f8", + "dd8c5036801f9c357bd8f79fce9309df", + "584f8a3b2eb9135794f45ed7d65ac3cd", + "224ec5bb64ec54b6a77eabf1d4a55512", + "ebe11fe4d6ec9d1dd79768a7b6df98b2", + "6e94dea2ac00c4ca0b0d80e6bd0e9a76", + "5fe34ad29b1856344ff826771cadd9a3", + "13ff348208368e367081f0cadc954eb9", + "d962ed10fe72bfbdf22f51d9bc46e5db", + "75d195250615dd560d442239956588fb", + "40710b79ded1cd6ea6b5b95eaff75cf4", + "72c6b89521277756677c699f911b75c9", + "6866bc52a0883b1b8449df01ce623421", + "5a2d2714f8a59e266553e53fff498490", + "eb0d7cca4fac2d7745534e671182ba16", + "d3023a46ff4c15255de05dfd50d9b18e" + ], + "File product_names_file": "c9319ff507db4478ac97150d2e693a6e", + "File proteins_file": "eddbe66a44455e361d64c9d566a92ee8", + "String __prefix": "D99564A36EECD8686D8B31D929BEB1AD", + "File ko_file": "65bc9fa8ea542c85107f2639e2bcf37d", + "File json_stats": "3c690549518ed805e22a74f41b9a5ce8", + "File tigrfam_file": "7a133d0f92c99046f6ba20348de7e819", + "String proj": "0B6C033A7D83F6E4B430E858C83BD9F7", + "File smart_file": "9f7c69d4f38aa1a8ba0db9755f467679" + } + }, + "effectiveCallCachingMode": "ReadAndWriteCache" + }, + "inputs": { + "proj": "nmdc:wfmag-12-fxwdrv82.1", + "smart_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/smart.gff", + "cog_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/cog.gff", + "prefix": null, + "container": "microbiomedata/nmdc_mbin_vis:0.2.0", + "proteins_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/proteins.faa", + "product_names_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/products.tsv", + "json_stats": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/MAGs_stats.json", + "__prefix": "nmdc_wfmag-12-fxwdrv82.1", + "cath_funfam_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/cath_funfam.gff", + "supfam_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/supfam.gff", + "ko_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/ko.tsv", + "ec_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/ec.tsv", + "gff_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/functional_annotation.gff", + "pfam_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/pfam.gff", + "bins": [ + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.1.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.11.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.12.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.17.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.20.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.21.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.22.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.33.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.37.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.38.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.40.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.42.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.43.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.7.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.10.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.13.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.14.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.15.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.16.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.18.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.19.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.2.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.23.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.24.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.25.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.26.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.27.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.28.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.29.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.3.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.30.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.31.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.32.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.34.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.35.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.36.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.39.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.4.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.41.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.44.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.45.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.46.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.47.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.5.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.6.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.8.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.9.fa" + ], + "tigrfam_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/tigrfam.gff" + }, + "returnCode": 0, + "jobId": "157725", + "backend": "HtCondor", + "end": "2024-07-01T19:53:37.640Z", + "dockerImageUsed": "microbiomedata/nmdc_mbin_vis@sha256:ec431444ad8c090932ccb4ff75d50b46b72de5d237f9069cb9bfcf47db49c911", + "stderr": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/execution/stderr", + "callRoot": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package", + "attempt": 1, + "executionEvents": [ + { + "startTime": "2024-07-01T18:26:25.794Z", + "description": "WaitingForValueStore", + "endTime": "2024-07-01T18:26:25.794Z" + }, + { + "startTime": "2024-07-01T18:26:26.360Z", + "description": "RunningJob", + "endTime": "2024-07-01T19:53:35.339Z" + }, + { + "startTime": "2024-07-01T18:26:23.725Z", + "description": "RequestingExecutionToken", + "endTime": "2024-07-01T18:26:25.794Z" + }, + { + "startTime": "2024-07-01T18:26:23.725Z", + "description": "Pending", + "endTime": "2024-07-01T18:26:23.725Z" + }, + { + "startTime": "2024-07-01T18:26:25.794Z", + "description": "PreparingJob", + "endTime": "2024-07-01T18:26:26.354Z" + }, + { + "startTime": "2024-07-01T18:26:26.354Z", + "description": "CallCacheReading", + "endTime": "2024-07-01T18:26:26.360Z" + }, + { + "startTime": "2024-07-01T19:53:35.339Z", + "description": "UpdatingCallCache", + "endTime": "2024-07-01T19:53:36.698Z" + }, + { + "startTime": "2024-07-01T19:53:36.698Z", + "description": "UpdatingJobStore", + "endTime": "2024-07-01T19:53:37.638Z" + } + ], + "start": "2024-07-01T18:26:23.725Z" + } + ], + "nmdc_mags.finish_mags": [ + { + "executionStatus": "Done", + "stdout": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/stdout", + "backendStatus": "Done", + "compressedDockerSize": 516978455, + "commandLine": "set -e\nend=`date --iso-8601=seconds`\n\nln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/-1737589708/bins.lowDepth.fa nmdc_wfmag-12-fxwdrv82.1_bins.lowDepth.fa\nln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/-1737589708/bins.tooShort.fa nmdc_wfmag-12-fxwdrv82.1_bins.tooShort.fa\nln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/-1737589708/bins.unbinned.fa nmdc_wfmag-12-fxwdrv82.1_bins.unbinned.fa\nln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/-1737589708/checkm-qa.out nmdc_wfmag-12-fxwdrv82.1_checkm_qa.out\nln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/-1737589708/mbin_nmdc_versions.log nmdc_wfmag-12-fxwdrv82.1_bin.info\nln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/184702129/gtdbtk.bac120.summary.tsv nmdc_wfmag-12-fxwdrv82.1_gtdbtk.bac122.summary.tsv\nln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/184702129/gtdbtk.ar122.summary.tsv nmdc_wfmag-12-fxwdrv82.1_gtdbtk.ar122.summary.tsv\nln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/1438666903/nmdc_wfmag-12-fxwdrv82.1_barplot.pdf nmdc_wfmag-12-fxwdrv82.1_barplot.pdf\nln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/1438666903/nmdc_wfmag-12-fxwdrv82.1_heatmap.pdf nmdc_wfmag-12-fxwdrv82.1_heatmap.pdf\nln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/1438666903/nmdc_wfmag-12-fxwdrv82.1_ko_krona.html nmdc_wfmag-12-fxwdrv82.1_kronaplot.html\nln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/1438666903/nmdc_wfmag-12-fxwdrv82.1_module_completeness.tab nmdc_wfmag-12-fxwdrv82.1_ko_matrix.txt\n\n# cp all tarfiles, zip them under prefix, if empty touch no_mags.txt\nmkdir -p hqmq\nif [ 0 -gt 0 ] ; then\n (cd hqmq && cp .)\n (cd hqmq && cp /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/-1737589708/mbin.sdb .)\n (cd hqmq && zip -j ../nmdc_wfmag-12-fxwdrv82.1_hqmq_bin.zip *tar.gz mbin.sdb ../*pdf ../*kronaplot.html ../*ko_matrix.txt)\nelse\n (cd hqmq && touch no_hqmq_mags.txt)\n (cd hqmq && cp /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/-1737589708/mbin.sdb .)\n (cd hqmq && zip ../nmdc_wfmag-12-fxwdrv82.1_hqmq_bin.zip *.txt mbin.sdb)\nfi\n\nmkdir -p lq\nif [ 0 -gt 0 ] ; then\n (cd lq && cp .)\n (cd lq && cp /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/-1737589708/mbin.sdb .)\n (cd lq && zip -j ../nmdc_wfmag-12-fxwdrv82.1_lq_bin.zip *tar.gz mbin.sdb /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/997245024/eukcc.csv.final ../*pdf ../*kronaplot.html ../*ko_matrix.txt)\nelse\n (cd lq && touch no_lq_mags.txt)\n (cd lq && cp /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/-1737589708/mbin.sdb .)\n (cd lq && zip ../nmdc_wfmag-12-fxwdrv82.1_lq_bin.zip *.txt mbin.sdb /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/997245024/eukcc.csv.final )\nfi\n\n# Fix up attribute name\ncat /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/-1737589708/MAGs_stats.json | \\\n sed 's/: null/: \"null\"/g' | \\\n sed 's/lowDepth_/low_depth_/' > nmdc_wfmag-12-fxwdrv82.1_mags_stats.json", + "shardIndex": -1, + "outputs": { + "final_kronaplot": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_kronaplot.html", + "final_heatmap": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_heatmap.pdf", + "final_stats_json": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_mags_stats.json", + "final_barplot": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_barplot.pdf", + "final_checkm": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_checkm_qa.out", + "final_lq_bins_zip": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_lq_bin.zip", + "final_gtdbtk_bac_summary": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_gtdbtk.bac122.summary.tsv", + "final_version": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_bin.info", + "final_lowDepth_fa": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_bins.lowDepth.fa", + "final_gtdbtk_ar_summary": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_gtdbtk.ar122.summary.tsv", + "final_unbinned_fa": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_bins.unbinned.fa", + "final_short": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_bins.tooShort.fa", + "final_hqmq_bins_zip": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_hqmq_bin.zip" + }, + "runtimeAttributes": { + "runtime_minutes": "120", + "priority": "0", + "disk": "0.244140625 GB", + "failOnStderr": "false", + "continueOnReturnCode": "0", + "docker": "microbiomedata/workflowmeta:1.1.1", + "maxRetries": "1", + "cpu": "4", + "memory": "10 GB" + }, + "callCaching": { + "allowResultReuse": true, + "hit": false, + "result": "Cache Miss", + "hashes": { + "output count": "C51CE410C124A10E0DB5E4B97FC2AF39", + "runtime attribute": { + "docker": "A49801FD73AE1E831A0EF44B1F79DCB9", + "continueOnReturnCode": "CFCD208495D565EF66E7DFF9F98764DA", + "failOnStderr": "68934A3E9455FA72420237EB05902327" + }, + "output expression": { + "File final_kronaplot": "DC9995C52FEB104FA3386522DF38050F", + "File final_stats_json": "8910A2E05E66C923F85F50956FE696C2", + "File final_version": "E6A065CF46B2302C846ACA239680C5DA", + "File final_gtdbtk_bac_summary": "6EB18E0253124483A545F409F88E019A", + "File final_unbinned_fa": "88D797DB5CDD28AC43432CF04BECFC3F", + "File final_heatmap": "937B0F31CD5E38CA7F65FED52A2A205C", + "File final_barplot": "D5C03677905831471247ECFB23D9F798", + "File final_short": "A5B0BBA3B45B7EE47F8C96AA1C385078", + "File final_checkm": "85FA588428B4771129EB9FF97861EFF4", + "File final_gtdbtk_ar_summary": "191BAEBF116B81592C8E1994BD37BDC9", + "File final_lowDepth_fa": "501656F8F2BA6A150B0F61CD92850B9C", + "File final_hqmq_bins_zip": "03D6BD361C83A352D1694F570065CF05", + "File final_lq_bins_zip": "C3F4E86A1F3C8E73E66CD606A1C06BFE" + }, + "input count": "E2C420D928D4BF8CE0FF2EC19B371514", + "backend name": "24B80D5AA1F64928B14AC8407909E586", + "command template": "BC3DB3241648D4FF97777DAD3AF022BF", + "input": { + "Int __n_lq": "CFCD208495D565EF66E7DFF9F98764DA", + "File arcsum": "8796101cb0baf1a8e9f6e383285eba69", + "File stats_tsv": "ab51bb3bc9e4fb4658426ba8ce55f0b5", + "File stats_json": "3c690549518ed805e22a74f41b9a5ce8", + "File bacsum": "5d5e2d60857ab3e0669514ee19cb3e54", + "File barplot": "cf063ee66778585dfc7243056b3f68e5", + "File bin_fasta_files": [ + "4c43133298d71002259f856720995efb", + "0bb311382c673962f0f7587eb614ea33", + "c6b739ac09ea09f5a5b243fc81ed075f", + "d8ed5ba779e0c17bfac43e46717b7df6", + "9c752f3ab53db1304bc115d7b112122f", + "9d52837869fcb54b8f35d2b07c536d16", + "6264770bfb30cd917cba490fe4496827", + "5caf431a8a98222902e18d6b619b94e3", + "ddab74b9de92c0366bd6ac3a90ec3ac5", + "b636e39db668cf7f1082b38de5565c84", + "eeef47252a10fc24c9ca367f38479a98", + "17899bef5c05f0f67c0688e9bfa7b84f", + "3cd99095e4e49337f1a0ba8e2e78b8d4", + "557f966b1240c5ad8e0e881db3b7c763", + "445e37d0f2ec02326de0a67215bc9b92", + "ee0bfc516fa184e588c21a943c2b75e0", + "8ceebf37f47bc4eb3d440cd4f97ad726", + "015dc9c1217aff8439ddf12df1c870f8", + "dd8c5036801f9c357bd8f79fce9309df", + "584f8a3b2eb9135794f45ed7d65ac3cd", + "224ec5bb64ec54b6a77eabf1d4a55512", + "ebe11fe4d6ec9d1dd79768a7b6df98b2", + "6e94dea2ac00c4ca0b0d80e6bd0e9a76", + "5fe34ad29b1856344ff826771cadd9a3", + "13ff348208368e367081f0cadc954eb9", + "d962ed10fe72bfbdf22f51d9bc46e5db", + "75d195250615dd560d442239956588fb", + "40710b79ded1cd6ea6b5b95eaff75cf4", + "72c6b89521277756677c699f911b75c9", + "6866bc52a0883b1b8449df01ce623421", + "5a2d2714f8a59e266553e53fff498490", + "eb0d7cca4fac2d7745534e671182ba16", + "d3023a46ff4c15255de05dfd50d9b18e" + ], + "File mbin_version": "464f8330e7c6487d06fcb6ca95f74337", + "File sorted_bam": "61d7bc083c1417c3d390cdac26c24faf", + "String container": "2B6029E738D4565E38624C1F8EFB1683", + "File unbinned": "aef4cc7d1355c65590eab99dd9115152", + "File mbin_sdb": "363df47ccc3f41da9231a864cc25065a", + "File checkm": "855941a010c3a8884f8ab080310c1be7", + "File ko_matrix": "cf063ee66778585dfc7243056b3f68e5", + "File hqmq_bin_fasta_files": [ + "4d9b99511546584bce7c7fab1fb3239a", + "a4eac78eb4c8b0ecf0e8756233771ace", + "35b44a186ca246254e2ed7e71863a4b7", + "07be3fec5a7a91e3a2cf645dc5dd8f35", + "e0aad9a4ab114fce04b3c8f237947177", + "7de3db5f8dfb1dd3939a57988df964c3", + "c3b17de2b3762611e07796c1d9f071db", + "7371ac1f2883778219ea5384aab65cf0", + "10ffa413343a45a3e2483ddde6714372", + "df5bc7b394e2ff05ad66b13e678af5ec", + "d86ad0e91c260e66ae0aae5215117766", + "f31f3cecbc75788f542bf8a282e809d4", + "b4a919b6d17530372401f4265f4280d5", + "fb687e086e66cfd6d1c9562227ff82eb" + ], + "File eukcc_file": "d97456254b77ef7870e6a4b238afd00e", + "File kronaplot": "cf063ee66778585dfc7243056b3f68e5", + "File heatmap": "cf063ee66778585dfc7243056b3f68e5", + "String __prefix": "D99564A36EECD8686D8B31D929BEB1AD", + "String start": "51A09424D260EDD03B8F5D99411B03D6", + "File low": "d41d8cd98f00b204e9800998ecf8427e", + "File short": "6eaf12e65d6a2c80b598ae4d568bf902", + "File contigs": "f89ee38065ee7324bdbd46c627faae33", + "File anno_gff": "ea2c8bf4db0775c024361a3681f5e365", + "String proj": "0B6C033A7D83F6E4B430E858C83BD9F7", + "Int __n_hqmq": "CFCD208495D565EF66E7DFF9F98764DA" + } + }, + "effectiveCallCachingMode": "ReadAndWriteCache" + }, + "inputs": { + "stats_json": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/MAGs_stats.json", + "proj": "nmdc:wfmag-12-fxwdrv82.1", + "arcsum": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/gtdbtk-output/gtdbtk.ar122.summary.tsv", + "barplot": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/execution/nmdc_wfmag-12-fxwdrv82.1_barplot.pdf", + "prefix": null, + "short": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/bins.tooShort.fa", + "container": "microbiomedata/workflowmeta:1.1.1", + "ko_matrix": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/execution/nmdc_wfmag-12-fxwdrv82.1_module_completeness.tab", + "stats_tsv": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/MAGs_stats.tsv", + "heatmap": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/execution/nmdc_wfmag-12-fxwdrv82.1_heatmap.pdf", + "hqmq_bin_fasta_files": [ + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.1.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.11.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.12.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.17.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.20.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.21.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.22.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.33.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.37.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.38.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.40.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.42.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.43.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.7.fa" + ], + "bin_fasta_files": [ + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.10.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.13.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.14.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.15.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.16.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.18.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.19.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.2.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.23.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.24.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.25.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.26.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.27.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.28.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.29.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.3.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.30.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.31.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.32.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.34.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.35.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.36.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.39.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.4.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.41.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.44.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.45.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.46.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.47.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.5.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.6.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.8.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.9.fa" + ], + "sorted_bam": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/pairedMapped_sorted.bam", + "n_hqmq": null, + "low": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/bins.lowDepth.fa", + "__prefix": "nmdc_wfmag-12-fxwdrv82.1", + "contigs": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/contigs.fasta", + "n_lq": null, + "checkm": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/checkm-qa.out", + "unbinned": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/bins.unbinned.fa", + "mbin_version": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/mbin_nmdc_versions.log", + "mbin_sdb": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/mbin.sdb", + "bacsum": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/gtdbtk-output/gtdbtk.bac120.summary.tsv", + "start": "2024-07-01T17:24:07+00:00", + "hqmq_bin_tarfiles": [], + "kronaplot": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/execution/nmdc_wfmag-12-fxwdrv82.1_ko_krona.html", + "lq_bin_tarfiles": [], + "__n_hqmq": 0, + "eukcc_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/eukcc_output/eukcc.csv.final", + "anno_gff": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/functional_annotation.gff", + "__n_lq": 0 + }, + "returnCode": 0, + "jobId": "157726", + "backend": "HtCondor", + "end": "2024-07-01T19:55:37.633Z", + "dockerImageUsed": "microbiomedata/workflowmeta@sha256:f0ca787887e43851bbda55bef4c7b68513ffd7940e1f24f586c1d6fe220624a8", + "stderr": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/stderr", + "callRoot": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags", + "attempt": 1, + "executionEvents": [ + { + "startTime": "2024-07-01T19:53:45.804Z", + "description": "PreparingJob", + "endTime": "2024-07-01T19:53:46.644Z" + }, + { + "startTime": "2024-07-01T19:53:46.673Z", + "description": "RunningJob", + "endTime": "2024-07-01T19:55:33.863Z" + }, + { + "startTime": "2024-07-01T19:55:36.669Z", + "description": "UpdatingJobStore", + "endTime": "2024-07-01T19:55:37.633Z" + }, + { + "startTime": "2024-07-01T19:53:39.422Z", + "description": "RequestingExecutionToken", + "endTime": "2024-07-01T19:53:45.803Z" + }, + { + "startTime": "2024-07-01T19:53:39.414Z", + "description": "Pending", + "endTime": "2024-07-01T19:53:39.422Z" + }, + { + "startTime": "2024-07-01T19:53:45.803Z", + "description": "WaitingForValueStore", + "endTime": "2024-07-01T19:53:45.804Z" + }, + { + "startTime": "2024-07-01T19:53:46.644Z", + "description": "CallCacheReading", + "endTime": "2024-07-01T19:53:46.673Z" + }, + { + "startTime": "2024-07-01T19:55:33.863Z", + "description": "UpdatingCallCache", + "endTime": "2024-07-01T19:55:36.669Z" + } + ], + "start": "2024-07-01T19:53:39.408Z" + } + ] + }, + "outputs": { + "nmdc_mags.kronaplot": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_kronaplot.html", + "nmdc_mags.short": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bins.tooShort.fa", + "nmdc_mags.final_unbinned_fa": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bins.unbinned.fa", + "nmdc_mags.heatmap": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_heatmap.pdf", + "nmdc_mags.final_gtdbtk_ar_summary": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_gtdbtk.ar122.summary.tsv", + "nmdc_mags.final_hqmq_bins_zip": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_hqmq_bin.zip", + "nmdc_mags.final_checkm": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_checkm_qa.out", + "nmdc_mags.mags_version": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bin.info", + "nmdc_mags.final_lq_bins_zip": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_lq_bin.zip", + "nmdc_mags.barplot": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_barplot.pdf", + "nmdc_mags.final_gtdbtk_bac_summary": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_gtdbtk.bac122.summary.tsv", + "nmdc_mags.low": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bins.lowDepth.fa", + "nmdc_mags.final_stats_json": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_mags_stats.json" + }, + "workflowRoot": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/cromwell-000-000-000-000", + "actualWorkflowLanguage": "WDL", + "id": "cromwell-000-000-000-000", + "inputs": { + "checkm_db": "/refdata/checkM_DB/checkm_data_2015_01_16", + "map_file": null, + "nmdc_mags.stage.products_out": "products.tsv", + "nmdc_mags.stage.cath_funfam_out": "cath_funfam.gff", + "eukcc2_db": "/refdata/EUKCC2_DB/eukcc2_db_ver_1.2", + "proj": "nmdc:wfmag-12-fxwdrv82.1", + "gtdbtk_db": "/refdata/GTDBTK_DB/gtdbtk_release207_v2", + "nmdc_mags.package.prefix": null, + "smart_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_smart.gff", + "lineage_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_scaffold_lineage.tsv", + "nmdc_mags.stage.proteins_out": "proteins.faa", + "cog_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_cog.gff", + "nmdc_mags.finish_mags.n_hqmq": null, + "nmdc_mags.finish_mags.n_lq": null, + "container": "microbiomedata/nmdc_mbin@sha256:57930406fb5cc364bacfc904066519de6cdc2d0ceda9db0eebf2336df3ef5349", + "proteins_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_proteins.faa", + "nmdc_mags.stage.gff_out": "functional_annotation.gff", + "nmdc_mags.stage.ko_out": "ko.tsv", + "nmdc_mags.stage.cog_out": "cog.gff", + "scratch_dir": null, + "nmdc_mags.stage.supfam_out": "supfam.gff", + "nmdc_mags.stage.contigs_out": "contigs.fasta", + "product_names_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_product_names.tsv", + "nmdc_mags.stage.gene_phylogeny_out": "gene_phylogeny.tsv", + "package_container": "microbiomedata/nmdc_mbin_vis:0.2.0", + "nmdc_mags.stage.pfam_out": "pfam.gff", + "nmdc_mags.stage.bam_out": "pairedMapped_sorted.bam", + "nmdc_mags.stage.smart_out": "smart.gff", + "cath_funfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_cath_funfam.gff", + "supfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_supfam.gff", + "cpu": 32, + "nmdc_mags.stage.tigrfam_out": "tigrfam.gff", + "gene_phylogeny_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_gene_phylogeny.tsv", + "nmdc_mags.stage.ec_out": "ec.tsv", + "ko_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_ko.tsv", + "ec_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_ec.tsv", + "gff_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_functional_annotation.gff", + "pfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_pfam.gff", + "threads": 64, + "nmdc_mags.finish_mags.prefix": null, + "nmdc_mags.stage.lineage_out": "lineage.tsv", + "tigrfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_tigrfam.gff", + "pthreads": 1, + "sam_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgas-13-56028x05.1/nmdc_wfmgas-13-56028x05.1_pairedMapped_sorted.bam", + "contig_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgas-13-56028x05.1/nmdc_wfmgas-13-56028x05.1_contigs.fna" + }, + "labels": { + "wdl": "mbin_nmdc.wdl", + "git_repo": "https://github.com/microbiomedata/metaMAGs", + "pipeline_version": "v1.3.2", + "cromwell-workflow-id": "cromwell-29628c3e-8850-4210-927a-1d4258fa35d1", + "pipeline": "mbin_nmdc.wdl", + "opid": "nmdc:sys0v1137690", + "activity_id": "nmdc:wfmag-12-fxwdrv82.1", + "submitter": "nmdcda", + "release": "v1.3.2" + }, + "submission": "2024-07-01T16:54:45.797Z", + "status": "Succeeded", + "end": "2024-07-01T19:55:38.764Z", + "start": "2024-07-01T16:54:56.053Z" +} \ No newline at end of file diff --git a/tests/fixtures/mags_job_record.json b/tests/fixtures/mags_job_record.json new file mode 100644 index 00000000..7bbea6b7 --- /dev/null +++ b/tests/fixtures/mags_job_record.json @@ -0,0 +1,226 @@ +{ + "workflow" : { + "id" : "MAGs: v1.0.6" + }, + "id" : "nmdc:ab9a3d70-2bfa-11ee-940d-4eb93f6e7850", + "created_at" : "2023-07-26T21:23:31.000+0000", + "config" : { + "git_repo" : "https://github.com/microbiomedata/metaMAGs", + "release" : "v1.0.6", + "wdl" : "mbin_nmdc.wdl", + "activity_id" : "nmdc:wfmag-11-05myyz45.1", + "activity_set" : "mags_activity_set", + "was_informed_by" : "nmdc:omprc-11-vpqmce67", + "trigger_activity" : "nmdc:wfmgan-11-wdx72h27.1", + "iteration" : 1, + "input_prefix" : "nmdc_mags", + "inputs" : { + "contig_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgas-11-90bn3y70.1/nmdc_wfmgas-11-90bn3y70.1_contigs.fna", + "gff_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_functional_annotation.gff", + "cath_funfam_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_cath_funfam.gff", + "supfam_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_supfam.gff", + "cog_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_cog.gff", + "proj_name" : "nmdc:wfmag-11-05myyz45.1", + "pfam_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_pfam.gff", + "product_names_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_product_names.tsv", + "tigrfam_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_tigrfam.gff", + "ec_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_ec.tsv", + "ko_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_ko.tsv", + "lineage_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_scaffold_lineage.tsv", + "sam_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgas-11-90bn3y70.1/nmdc_wfmgas-11-90bn3y70.1_pairedMapped_sorted.bam", + "smart_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_smart.gff", + "proteins_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_proteins.faa", + "gene_phylogeny_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_gene_phylogeny.tsv", + "proj" : "nmdc:wfmag-11-05myyz45.1" + }, + "input_data_objects" : [ + { + "id" : "nmdc:dobj-11-9mkb6w25", + "name" : "nmdc_wfmgas-11-90bn3y70.1_contigs.fna", + "description" : "Assembly contigs for nmdc:wfmgas-11-90bn3y70.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgas-11-90bn3y70.1/nmdc_wfmgas-11-90bn3y70.1_contigs.fna", + "md5_checksum" : "64ac183a6f9c497fa6ae43cc2aa1ca6e", + "file_size_bytes" : 26375887, + "data_object_type" : "Assembly Contigs" + }, + { + "id" : "nmdc:dobj-11-gt7grc22", + "name" : "nmdc_wfmgan-11-wdx72h27.1_functional_annotation.gff", + "description" : "Functional Annotation for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_functional_annotation.gff", + "md5_checksum" : "4a0d219fa5c4cbbcf9ea09bb8adaa95c", + "file_size_bytes" : 21676898, + "data_object_type" : "Functional Annotation GFF" + }, + { + "id" : "nmdc:dobj-11-20kgjz21", + "name" : "nmdc_wfmgan-11-wdx72h27.1_cath_funfam.gff", + "description" : "CATH FunFams for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_cath_funfam.gff", + "md5_checksum" : "b741c2924517489a3dae08faec12a275", + "file_size_bytes" : 11283509, + "data_object_type" : "CATH FunFams (Functional Families) Annotation GFF" + }, + { + "id" : "nmdc:dobj-11-rse8j628", + "name" : "nmdc_wfmgan-11-wdx72h27.1_supfam.gff", + "description" : "SUPERFam Annotations for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_supfam.gff", + "md5_checksum" : "fd6e6169b921ccbb60a29972ae048a19", + "file_size_bytes" : 15369673, + "data_object_type" : "SUPERFam Annotation GFF" + }, + { + "id" : "nmdc:dobj-11-vkz6mc22", + "name" : "nmdc_wfmgan-11-wdx72h27.1_cog.gff", + "description" : "COGs for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_cog.gff", + "md5_checksum" : "91f8c708ac8a00aa1459b7f2e297499d", + "file_size_bytes" : 12788976, + "data_object_type" : "Clusters of Orthologous Groups (COG) Annotation GFF" + }, + { + "id" : "nmdc:dobj-11-ywbazd98", + "name" : "nmdc_wfmgan-11-wdx72h27.1_pfam.gff", + "description" : "Pfam Annotation for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_pfam.gff", + "md5_checksum" : "5c513e356fc7104b8b552b68e838bbad", + "file_size_bytes" : 9682652, + "data_object_type" : "Pfam Annotation GFF" + }, + { + "id" : "nmdc:dobj-11-s0swen20", + "name" : "nmdc_wfmgan-11-wdx72h27.1_product_names.tsv", + "description" : "Product names for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_product_names.tsv", + "md5_checksum" : "2a81f01bb1eb5ed849cb00b0b45adcf7", + "file_size_bytes" : 6526155, + "data_object_type" : "Product Names" + }, + { + "id" : "nmdc:dobj-11-mxwbrg81", + "name" : "nmdc_wfmgan-11-wdx72h27.1_tigrfam.gff", + "description" : "TIGRFam for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_tigrfam.gff", + "md5_checksum" : "b709aa5a09615e9a8ac34c9e88d658e3", + "file_size_bytes" : 869578, + "data_object_type" : "TIGRFam Annotation GFF" + }, + { + "id" : "nmdc:dobj-11-n5hs3k52", + "name" : "nmdc_wfmgan-11-wdx72h27.1_ec.tsv", + "description" : "EC Annotations for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_ec.tsv", + "md5_checksum" : "c36eb4d979461c3346308e9d9bc2252e", + "file_size_bytes" : 2104885, + "data_object_type" : "Annotation Enzyme Commission" + }, + { + "id" : "nmdc:dobj-11-bvge9w42", + "name" : "nmdc_wfmgan-11-wdx72h27.1_ko.tsv", + "description" : "KEGG Orthology for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_ko.tsv", + "md5_checksum" : "2a99a362b0b5d0559ae6d46636d82b27", + "file_size_bytes" : 3088516, + "data_object_type" : "Annotation KEGG Orthology" + }, + { + "id" : "nmdc:dobj-11-m19exh45", + "name" : "nmdc_wfmgan-11-wdx72h27.1_scaffold_lineage.tsv", + "description" : "Scaffold Lineage tsv for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_scaffold_lineage.tsv", + "md5_checksum" : "784b9fb9bc77c00107a51131362f04f9", + "file_size_bytes" : 9663889, + "data_object_type" : "Scaffold Lineage tsv" + }, + { + "id" : "nmdc:dobj-11-bg0j9849", + "name" : "nmdc_wfmgas-11-90bn3y70.1_pairedMapped_sorted.bam", + "description" : "Sorted Bam for nmdc:wfmgas-11-90bn3y70.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgas-11-90bn3y70.1/nmdc_wfmgas-11-90bn3y70.1_pairedMapped_sorted.bam", + "md5_checksum" : "630401367bd703262f52d95c4e5b22d1", + "file_size_bytes" : 1747712213, + "data_object_type" : "Assembly Coverage BAM" + }, + { + "id" : "nmdc:dobj-11-r40xwr84", + "name" : "nmdc_wfmgan-11-wdx72h27.1_smart.gff", + "description" : "SMART Annotations for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_smart.gff", + "md5_checksum" : "fa161d364f57eb6aa3af483a90d14cd5", + "file_size_bytes" : 2517672, + "data_object_type" : "SMART Annotation GFF" + }, + { + "id" : "nmdc:dobj-11-5kk68p73", + "name" : "nmdc_wfmgan-11-wdx72h27.1_proteins.faa", + "description" : "FASTA Amino Acid File for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_proteins.faa", + "md5_checksum" : "331bce1648f53e5631753f96ee79c250", + "file_size_bytes" : 15067142, + "data_object_type" : "Annotation Amino Acid FASTA" + }, + { + "id" : "nmdc:dobj-11-jfgh0180", + "name" : "nmdc_wfmgan-11-wdx72h27.1_gene_phylogeny.tsv", + "description" : "Gene Phylogeny for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_gene_phylogeny.tsv", + "md5_checksum" : "16f945aa958cd0c13847d44a44736333", + "file_size_bytes" : 12195613, + "data_object_type" : "Gene Phylogeny tsv" + } + ], + "activity" : { + "name" : "Metagenome Assembled Genomes Analysis Activity for {id}", + "type" : "nmdc:MagsAnalysisActivity" + }, + "outputs" : [ + { + "output" : "final_checkm", + "data_object_type" : "CheckM Statistics", + "description" : "CheckM for {id}", + "name" : "CheckM statistics report", + "suffix" : "_checkm_qa.out", + "id" : "nmdc:dobj-11-yrzfq471" + }, + { + "output" : "final_hqmq_bins_zip", + "data_object_type" : "Metagenome Bins", + "description" : "Metagenome Bins for {id}", + "name" : "Metagenome bin tarfiles archive", + "suffix" : "_hqmq_bin.zip", + "id" : "nmdc:dobj-11-dsbday74" + }, + { + "output" : "final_gtdbtk_bac_summary", + "data_object_type" : "GTDBTK Bacterial Summary", + "description" : "Bacterial Summary for {id}", + "name" : "GTDBTK bacterial summary", + "suffix" : "_gtdbtk.bac122.summary.tsv", + "id" : "nmdc:dobj-11-104ypv57" + }, + { + "output" : "final_gtdbtk_ar_summary", + "data_object_type" : "GTDBTK Archaeal Summary", + "description" : "Archaeal Summary for {id}", + "name" : "GTDBTK archaeal summary", + "suffix" : "_gtdbtk.ar122.summary.tsv", + "id" : "nmdc:dobj-11-t1v6w944" + }, + { + "output" : "mags_version", + "data_object_type" : "Metagenome Bins Info File", + "description" : "Metagenome Bins Info File for {id}", + "name" : "Metagenome Bins Info File", + "suffix" : "_bin.info", + "id" : "nmdc:dobj-11-0c397145" + } + ] + }, + "claims" : [ + { + "op_id" : "nmdc:sys0jm1cts41", + "site_id" : "NERSC" + } + ] +} \ No newline at end of file diff --git a/tests/fixtures/mags_record.json b/tests/fixtures/mags_record.json new file mode 100644 index 00000000..12ab23d3 --- /dev/null +++ b/tests/fixtures/mags_record.json @@ -0,0 +1,36 @@ +{ + "id": "nmdc:wfmag-11-00jn7876.1", + "name": "Metagenome Assembled Genomes Analysis Activity for nmdc:wfmag-11-00jn7876.1", + "started_at_time": "2023-07-30T21:31:56.387227+00:00", + "ended_at_time": "2023-07-30T21:34:32.750008+00:00", + "was_informed_by": "nmdc:omprc-11-7yj0jg57", + "execution_resource": "NERSC-Perlmutter", + "git_url": "https://github.com/microbiomedata/metaMAGs", + "has_input": [ + "nmdc:dobj-11-yjp1xw52", + "nmdc:dobj-11-3av14y79", + "nmdc:dobj-11-wa5pnq42", + "nmdc:dobj-11-nexa9703", + "nmdc:dobj-11-j13n8739", + "nmdc:dobj-11-116fa706", + "nmdc:dobj-11-60d0na51", + "nmdc:dobj-11-2vbz7538", + "nmdc:dobj-11-1t48mn65", + "nmdc:dobj-11-1cvwk224", + "nmdc:dobj-11-cdna6f90", + "nmdc:dobj-11-4vb3ww76", + "nmdc:dobj-11-xv4qd072", + "nmdc:dobj-11-m7p3sb10", + "nmdc:dobj-11-j0t1rv33" + ], + "has_output": [ + "nmdc:dobj-11-k5ad4209", + "nmdc:dobj-11-bw8nqt30", + "nmdc:dobj-11-199t2777", + "nmdc:dobj-11-2qfh8476", + "nmdc:dobj-11-fcsvq172" + ], + "type": "nmdc:MagsAnalysis", + "version": "v1.0.6", + "mags_list": [] + } \ No newline at end of file diff --git a/tests/fixtures/mags_workflow_state.json b/tests/fixtures/mags_workflow_state.json new file mode 100644 index 00000000..7cf173f0 --- /dev/null +++ b/tests/fixtures/mags_workflow_state.json @@ -0,0 +1,216 @@ +{ + "type": "MAGs: v1.3.10", + "cromwell_jobid": "9492a397-eb30-472b-9d3b-b44b676f4652", + "nmdc_jobid": "nmdc:66cf64b6-7462-11ef-8b84-deaa01ab0f49", + "conf": { + "git_repo": "https://github.com/microbiomedata/metaMAGs", + "release": "v1.3.10", + "wdl": "mbin_nmdc.wdl", + "activity_id": "nmdc:wfmag-11-g7msr323.1", + "activity_set": "mags_activity_set", + "was_informed_by": "nmdc:omprc-11-9cdxha98", + "trigger_activity": "nmdc:wfmgan-11-jv8kx789.1", + "iteration": 1, + "input_prefix": "nmdc_mags", + "inputs": { + "proj": "nmdc:wfmag-11-g7msr323.1", + "contig_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "sam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgas-11-0qvjnc54.1/nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "gff_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "proteins_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "cog_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "ec_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "ko_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "pfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "tigrfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "crispr_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "product_names_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "gene_phylogeny_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "lineage_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "map_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv" + }, + "input_data_objects": [ + { + "id": "nmdc:dobj-11-1x850k20", + "name": "nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "description": "Assembly contigs (remapped) for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "md5_checksum": "6debed079383eeca2045ce23b0576607", + "file_size_bytes": 2084209623, + "data_object_type": "Assembly Contigs" + }, + { + "id": "nmdc:dobj-11-fkj2kt47", + "name": "nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-9cdxha98", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgas-11-0qvjnc54.1/nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "md5_checksum": "88ec004bd037a3820060427098798666", + "file_size_bytes": 15704979428, + "data_object_type": "Assembly Coverage BAM" + }, + { + "id": "nmdc:dobj-11-f9rnav80", + "name": "nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "description": "Functional Annotation for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "md5_checksum": "349cae9b4fe62bb910f08a183e57b475", + "file_size_bytes": 1320869282, + "data_object_type": "Functional Annotation GFF" + }, + { + "id": "nmdc:dobj-11-btqzf393", + "name": "nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "description": "FASTA Amino Acid File for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "md5_checksum": "292eae73923605dae2ef9f5d582e4603", + "file_size_bytes": 1075716574, + "data_object_type": "Annotation Amino Acid FASTA" + }, + { + "id": "nmdc:dobj-11-hdty3m42", + "name": "nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "description": "COGs for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "md5_checksum": "c4d1121c1ceb1229afb7190d23553003", + "file_size_bytes": 712459544, + "data_object_type": "Clusters of Orthologous Groups (COG) Annotation GFF" + }, + { + "id": "nmdc:dobj-11-0gk70187", + "name": "nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "description": "EC Annotations for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "md5_checksum": "84cf22f39532e1bd001bea8425735a82", + "file_size_bytes": 116429630, + "data_object_type": "Annotation Enzyme Commission" + }, + { + "id": "nmdc:dobj-11-3mtmhf26", + "name": "nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "description": "KEGG Orthology for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "md5_checksum": "17d699df17c97fc28796a198cf40a328", + "file_size_bytes": 169182276, + "data_object_type": "Annotation KEGG Orthology" + }, + { + "id": "nmdc:dobj-11-7kfhf682", + "name": "nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "description": "Pfam Annotation for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "md5_checksum": "23c33758dc138e1af0f39fa1f3ca07db", + "file_size_bytes": 602929841, + "data_object_type": "Pfam Annotation GFF" + }, + { + "id": "nmdc:dobj-11-9hjg8y84", + "name": "nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "description": "TIGRFam for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "md5_checksum": "bbfded219e0b359602725c9efb4f0c54", + "file_size_bytes": 61788991, + "data_object_type": "TIGRFam Annotation GFF" + }, + { + "id": "nmdc:dobj-11-2x0wy902", + "name": "nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "description": "Crispr Terms for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "md5_checksum": "9d2255a63e39552328c4da20ccf2bb3f", + "file_size_bytes": 142989, + "data_object_type": "Crispr Terms" + }, + { + "id": "nmdc:dobj-11-r0bx4g71", + "name": "nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "description": "Product names for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "md5_checksum": "6f1325b2f8dee9b2a75598fb9645c43d", + "file_size_bytes": 401118634, + "data_object_type": "Product Names" + }, + { + "id": "nmdc:dobj-11-7mj15p44", + "name": "nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "description": "Gene Phylogeny for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "md5_checksum": "037aee803f1b81ac5ac1bccb9a18527d", + "file_size_bytes": 748420652, + "data_object_type": "Gene Phylogeny tsv" + }, + { + "id": "nmdc:dobj-11-r2zqpy26", + "name": "nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "description": "Scaffold Lineage tsv for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "md5_checksum": "efdce9771cdda8bd8548e44ef6d1d3a3", + "file_size_bytes": 503898615, + "data_object_type": "Scaffold Lineage tsv" + }, + { + "id": "nmdc:dobj-11-4k2bt072", + "name": "nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv", + "description": "Contig mappings file for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv", + "md5_checksum": "1056a6ef48ce9124de0828ee85246e65", + "file_size_bytes": 250129248, + "data_object_type": "Contig Mapping File" + } + ], + "activity": { + "name": "Metagenome Assembled Genomes Analysis Activity for {id}", + "type": "nmdc:MagsAnalysisActivity", + "binned_contig_num": "{outputs.final_stats_json.binned_contig_num}", + "input_contig_num": "{outputs.final_stats_json.input_contig_num}", + "low_depth_contig_num": "{outputs.final_stats_json.low_depth_contig_num}", + "mags_list": "{outputs.final_stats_json.mags_list}", + "too_short_contig_num": "{outputs.final_stats_json.too_short_contig_num}", + "unbinned_contig_num": "{outputs.final_stats_json.unbinned_contig_num}" + }, + "outputs": [ + { + "output": "final_checkm", + "data_object_type": "CheckM Statistics", + "description": "CheckM for {id}", + "name": "CheckM statistics report", + "id": "nmdc:dobj-11-xvjz5h55" + }, + { + "output": "final_hqmq_bins_zip", + "data_object_type": "Metagenome Bins", + "description": "Metagenome Bins for {id}", + "name": "Metagenome bin tarfiles archive", + "id": "nmdc:dobj-11-85q1v678" + }, + { + "output": "final_gtdbtk_bac_summary", + "data_object_type": "GTDBTK Bacterial Summary", + "description": "Bacterial Summary for {id}", + "name": "GTDBTK bacterial summary", + "id": "nmdc:dobj-11-j5p58211" + }, + { + "output": "final_gtdbtk_ar_summary", + "data_object_type": "GTDBTK Archaeal Summary", + "description": "Archaeal Summary for {id}", + "name": "GTDBTK archaeal summary", + "suffix": "_gtdbtk.ar122.summary.tsv", + "id": "nmdc:dobj-11-ec2fqk35" + }, + { + "output": "mags_version", + "data_object_type": "Metagenome Bins Info File", + "description": "Metagenome Bins Info File for {id}", + "name": "Metagenome Bins Info File", + "id": "nmdc:dobj-11-kg68h909" + } + ] + }, + "activity_id": "nmdc:wfmag-11-g7msr323.1", + "last_status": "Succeeded", + "done": true, + "failed_count": 0, + "start": "2024-09-16T19:33:32.562412+00:00", + "end": "2024-09-16T21:52:12.873101+00:00", + "opid": "nmdc:sys0m369xp60" + } diff --git a/tests/fixtures/metagenome_assembly_set.json b/tests/fixtures/metagenome_assembly_set.json deleted file mode 100644 index 32d0962f..00000000 --- a/tests/fixtures/metagenome_assembly_set.json +++ /dev/null @@ -1,51 +0,0 @@ -[ - { - "has_input": [ - "nmdc:dobj-11-filteredreads" - ], - "part_of": [ - "nmdc:omprc-11-metag1" - ], - "scaf_N50": 302542, - "ctg_logsum": 5909271, - "ctg_N90": 1315235, - "gc_avg": 0.60257, - "scaf_N90": 1312362, - "scaf_logsum": 5934400, - "scaf_pct_gt50K": 1.7355337, - "gap_pct": 0.00334, - "git_url": "https://github.com/microbiomedata/metaAssembly", - "version": "v1.0.3", - "has_output": [ - "nmdc:dobj-11-contigs", - "nmdc:dobj-11-coverstats", - "nmdc:dobj-11-coverbam", - "nmdc:dobj-11-assemblyagp", - "nmdc:dobj-11-scaffolds" - ], - "asm_score": 11.297, - "was_informed_by": "nmdc:omprc-11-metag1", - "ctg_powsum": 727370, - "scaf_max": 517431, - "id": "nmdc:wfmgas-11-test001.1", - "scaf_powsum": 730816, - "execution_resource": "NERSC-Cori", - "contigs": 1705758, - "scaf_n_gt50K": 216, - "ctg_N50": 304732, - "name": "Assembly Activity for nmdc:mga0vx38", - "ctg_max": 517431, - "gc_std": 0.06928, - "contig_bp": 1168572354, - "ctg_L50": 785, - "scaf_l_gt50K": 20281644, - "scaf_L90": 322, - "started_at_time": "2021-08-05T14:48:51+00:00", - "ctg_L90": 322, - "scaf_bp": 1168611354, - "type": "nmdc:MetagenomeAssembly", - "scaf_L50": 789, - "scaffolds": 1702137, - "ended_at_time": "2021-09-15T10:13:20+00:00" - } -] diff --git a/tests/fixtures/metatranscriptome_assembly_set.json b/tests/fixtures/metatranscriptome_assembly_set.json deleted file mode 100644 index a0943f0d..00000000 --- a/tests/fixtures/metatranscriptome_assembly_set.json +++ /dev/null @@ -1,51 +0,0 @@ -[ - { - "has_input": [ - "nmdc:dobj-11-filteredreads1" - ], - "part_of": [ - "nmdc:omprc-11-metat1" - ], - "scaf_N50": 302542, - "ctg_logsum": 5909271, - "ctg_N90": 1315235, - "gc_avg": 0.60257, - "scaf_N90": 1312362, - "scaf_logsum": 5934400, - "scaf_pct_gt50K": 1.7355337, - "gap_pct": 0.00334, - "git_url": "https://github.com/microbiomedata/metaT_Assembly", - "version": "v0.0.1", - "has_output": [ - "nmdc:dobj-11-contigst1", - "nmdc:dobj-11-coverstatst1", - "nmdc:dobj-11-coverbamt1", - "nmdc:dobj-11-assemblyinfot1", - "nmdc:dobj-11-bamidxt1" - ], - "asm_score": 11.297, - "was_informed_by": "nmdc:omprc-11-metat1", - "ctg_powsum": 727370, - "scaf_max": 517431, - "id": "nmdc:wfmtas-11-test001.1", - "scaf_powsum": 730816, - "execution_resource": "NERSC-Cori", - "contigs": 1705758, - "scaf_n_gt50K": 216, - "ctg_N50": 304732, - "name": "Assembly Activity for metaT", - "ctg_max": 517431, - "gc_std": 0.06928, - "contig_bp": 1168572354, - "ctg_L50": 785, - "scaf_l_gt50K": 20281644, - "scaf_L90": 322, - "started_at_time": "2021-08-05T14:48:51+00:00", - "ctg_L90": 322, - "scaf_bp": 1168611354, - "type": "nmdc:MetatranscriptomeAssembly", - "scaf_L50": 789, - "scaffolds": 1702137, - "ended_at_time": "2021-09-15T10:13:20+00:00" - } -] \ No newline at end of file diff --git a/tests/fixtures/metatranscriptome_expression_analysis_set.json b/tests/fixtures/metatranscriptome_expression_analysis.json similarity index 93% rename from tests/fixtures/metatranscriptome_expression_analysis_set.json rename to tests/fixtures/metatranscriptome_expression_analysis.json index 486efce7..b70b1f86 100644 --- a/tests/fixtures/metatranscriptome_expression_analysis_set.json +++ b/tests/fixtures/metatranscriptome_expression_analysis.json @@ -17,9 +17,6 @@ "nmdc:dobj-11-expcountst1", "nmdc:dobj-11-expinfot1" ], - "part_of": [ - "nmdc:omprc-11-metat1" - ], "version": "v1.0.8" } ] \ No newline at end of file diff --git a/tests/fixtures/models/mags_analysis_record.json b/tests/fixtures/models/mags_analysis_record.json new file mode 100644 index 00000000..df222d83 --- /dev/null +++ b/tests/fixtures/models/mags_analysis_record.json @@ -0,0 +1,221 @@ +{ + "id": "nmdc:wfmag-11-g7msr323.1", + "type": "nmdc:MagsAnalysis", + "name": "Metagenome Assembled Genomes Analysis for nmdc:wfmag-11-g7msr323.1", + "git_url": "https://github.com/microbiomedata/metaMAGs", + "execution_resource": "NERSC-Perlmutter", + "was_informed_by": "nmdc:omprc-11-9cdxha98", + "has_input": [ + "nmdc:dobj-11-1x850k20", + "nmdc:dobj-11-fkj2kt47", + "nmdc:dobj-11-f9rnav80", + "nmdc:dobj-11-btqzf393", + "nmdc:dobj-11-hdty3m42", + "nmdc:dobj-11-0gk70187", + "nmdc:dobj-11-3mtmhf26", + "nmdc:dobj-11-7kfhf682", + "nmdc:dobj-11-9hjg8y84", + "nmdc:dobj-11-2x0wy902", + "nmdc:dobj-11-r0bx4g71", + "nmdc:dobj-11-7mj15p44", + "nmdc:dobj-11-r2zqpy26", + "nmdc:dobj-11-4k2bt072" + ], + "started_at_time": "2024-09-16T19:33:32.562412+00:00", + "ended_at_time": "2024-09-16T21:52:12.873101+00:00", + "version": "v1.3.10", + "has_output": [ + "nmdc:dobj-11-xvjz5h55", + "nmdc:dobj-11-85q1v678", + "nmdc:dobj-11-j5p58211", + "nmdc:dobj-11-ec2fqk35", + "nmdc:dobj-11-kg68h909" + ], + "binned_contig_num": 27214, + "mags_list": [ + { + "bin_name": "bins.40", + "number_of_contig": 44, + "completeness": 97.3, + "contamination": 3.38, + "total_bases": 0, + "gene_count": "null", + "bin_quality": "MQ", + "num_16s": 0, + "num_5s": 0, + "num_23s": 0, + "num_tRNA": 0, + "gtdbtk_domain": "Bacteria", + "gtdbtk_phylum": "Verrucomicrobiota", + "gtdbtk_class": "Verrucomicrobiae", + "gtdbtk_order": "Pedosphaerales", + "gtdbtk_family": "UBA11358", + "gtdbtk_genus": "UBA11358", + "gtdbtk_species": "null", + "members_id": [ + "nmdc:wfmgas-13-56028x05.1_7_c1", + "nmdc:wfmgas-13-56028x05.1_9_c1", + "nmdc:wfmgas-13-56028x05.1_16_c1", + "nmdc:wfmgas-13-56028x05.1_20_c1", + "nmdc:wfmgas-13-56028x05.1_23_c1", + "nmdc:wfmgas-13-56028x05.1_27_c1", + "nmdc:wfmgas-13-56028x05.1_45_c1", + "nmdc:wfmgas-13-56028x05.1_55_c1", + "nmdc:wfmgas-13-56028x05.1_71_c1", + "nmdc:wfmgas-13-56028x05.1_79_c1", + "nmdc:wfmgas-13-56028x05.1_99_c1", + "nmdc:wfmgas-13-56028x05.1_52_c2", + "nmdc:wfmgas-13-56028x05.1_127_c1", + "nmdc:wfmgas-13-56028x05.1_131_c1", + "nmdc:wfmgas-13-56028x05.1_137_c1", + "nmdc:wfmgas-13-56028x05.1_169_c1", + "nmdc:wfmgas-13-56028x05.1_200_c1", + "nmdc:wfmgas-13-56028x05.1_212_c1", + "nmdc:wfmgas-13-56028x05.1_223_c1", + "nmdc:wfmgas-13-56028x05.1_372_c1", + "nmdc:wfmgas-13-56028x05.1_393_c1", + "nmdc:wfmgas-13-56028x05.1_428_c1", + "nmdc:wfmgas-13-56028x05.1_52_c1", + "nmdc:wfmgas-13-56028x05.1_582_c1", + "nmdc:wfmgas-13-56028x05.1_706_c1", + "nmdc:wfmgas-13-56028x05.1_888_c1", + "nmdc:wfmgas-13-56028x05.1_912_c1", + "nmdc:wfmgas-13-56028x05.1_1268_c1", + "nmdc:wfmgas-13-56028x05.1_1271_c1", + "nmdc:wfmgas-13-56028x05.1_1492_c1", + "nmdc:wfmgas-13-56028x05.1_1494_c1", + "nmdc:wfmgas-13-56028x05.1_1604_c1", + "nmdc:wfmgas-13-56028x05.1_1627_c1", + "nmdc:wfmgas-13-56028x05.1_1888_c1", + "nmdc:wfmgas-13-56028x05.1_1938_c1", + "nmdc:wfmgas-13-56028x05.1_2944_c1", + "nmdc:wfmgas-13-56028x05.1_3261_c1", + "nmdc:wfmgas-13-56028x05.1_3477_c1", + "nmdc:wfmgas-13-56028x05.1_4194_c1", + "nmdc:wfmgas-13-56028x05.1_6257_c1", + "nmdc:wfmgas-13-56028x05.1_7589_c1", + "nmdc:wfmgas-13-56028x05.1_10469_c1", + "nmdc:wfmgas-13-56028x05.1_10553_c1", + "nmdc:wfmgas-13-56028x05.1_13792_c1" + ] + }, + { + "bin_name": "bins.9", + "number_of_contig": 92, + "completeness": 0.0, + "contamination": 0.0, + "total_bases": 0, + "gene_count": "null", + "bin_quality": "LQ", + "num_16s": 0, + "num_5s": 0, + "num_23s": 0, + "num_tRNA": 0, + "gtdbtk_domain": "null", + "gtdbtk_phylum": "null", + "gtdbtk_class": "null", + "gtdbtk_order": "null", + "gtdbtk_family": "null", + "gtdbtk_genus": "null", + "gtdbtk_species": "null", + "members_id": [ + "nmdc:wfmgas-13-56028x05.1_7094_c1", + "nmdc:wfmgas-13-56028x05.1_9486_c1", + "nmdc:wfmgas-13-56028x05.1_9853_c1", + "nmdc:wfmgas-13-56028x05.1_10857_c1", + "nmdc:wfmgas-13-56028x05.1_11702_c1", + "nmdc:wfmgas-13-56028x05.1_12042_c1", + "nmdc:wfmgas-13-56028x05.1_14174_c1", + "nmdc:wfmgas-13-56028x05.1_14597_c1", + "nmdc:wfmgas-13-56028x05.1_16115_c1", + "nmdc:wfmgas-13-56028x05.1_16261_c1", + "nmdc:wfmgas-13-56028x05.1_16795_c1", + "nmdc:wfmgas-13-56028x05.1_16943_c1", + "nmdc:wfmgas-13-56028x05.1_17208_c1", + "nmdc:wfmgas-13-56028x05.1_17245_c1", + "nmdc:wfmgas-13-56028x05.1_17383_c1", + "nmdc:wfmgas-13-56028x05.1_17783_c1", + "nmdc:wfmgas-13-56028x05.1_18468_c1", + "nmdc:wfmgas-13-56028x05.1_18553_c1", + "nmdc:wfmgas-13-56028x05.1_18858_c1", + "nmdc:wfmgas-13-56028x05.1_19302_c1", + "nmdc:wfmgas-13-56028x05.1_19824_c1", + "nmdc:wfmgas-13-56028x05.1_20316_c1", + "nmdc:wfmgas-13-56028x05.1_20787_c1", + "nmdc:wfmgas-13-56028x05.1_21029_c1", + "nmdc:wfmgas-13-56028x05.1_21435_c1", + "nmdc:wfmgas-13-56028x05.1_21475_c1", + "nmdc:wfmgas-13-56028x05.1_21484_c1", + "nmdc:wfmgas-13-56028x05.1_21518_c1", + "nmdc:wfmgas-13-56028x05.1_21685_c1", + "nmdc:wfmgas-13-56028x05.1_21809_c1", + "nmdc:wfmgas-13-56028x05.1_21924_c1", + "nmdc:wfmgas-13-56028x05.1_21958_c1", + "nmdc:wfmgas-13-56028x05.1_22186_c1", + "nmdc:wfmgas-13-56028x05.1_22271_c1", + "nmdc:wfmgas-13-56028x05.1_22516_c1", + "nmdc:wfmgas-13-56028x05.1_22514_c1", + "nmdc:wfmgas-13-56028x05.1_22777_c1", + "nmdc:wfmgas-13-56028x05.1_23003_c1", + "nmdc:wfmgas-13-56028x05.1_23115_c1", + "nmdc:wfmgas-13-56028x05.1_23204_c1", + "nmdc:wfmgas-13-56028x05.1_23239_c1", + "nmdc:wfmgas-13-56028x05.1_23352_c1", + "nmdc:wfmgas-13-56028x05.1_23445_c1", + "nmdc:wfmgas-13-56028x05.1_23505_c1", + "nmdc:wfmgas-13-56028x05.1_23571_c1", + "nmdc:wfmgas-13-56028x05.1_24047_c1", + "nmdc:wfmgas-13-56028x05.1_24749_c1", + "nmdc:wfmgas-13-56028x05.1_24981_c1", + "nmdc:wfmgas-13-56028x05.1_25059_c1", + "nmdc:wfmgas-13-56028x05.1_25526_c1", + "nmdc:wfmgas-13-56028x05.1_26162_c1", + "nmdc:wfmgas-13-56028x05.1_26376_c1", + "nmdc:wfmgas-13-56028x05.1_26773_c1", + "nmdc:wfmgas-13-56028x05.1_26816_c1", + "nmdc:wfmgas-13-56028x05.1_26891_c1", + "nmdc:wfmgas-13-56028x05.1_27179_c1", + "nmdc:wfmgas-13-56028x05.1_27272_c1", + "nmdc:wfmgas-13-56028x05.1_27358_c1", + "nmdc:wfmgas-13-56028x05.1_27411_c1", + "nmdc:wfmgas-13-56028x05.1_27550_c1", + "nmdc:wfmgas-13-56028x05.1_28892_c1", + "nmdc:wfmgas-13-56028x05.1_29003_c1", + "nmdc:wfmgas-13-56028x05.1_29238_c1", + "nmdc:wfmgas-13-56028x05.1_29324_c1", + "nmdc:wfmgas-13-56028x05.1_29771_c1", + "nmdc:wfmgas-13-56028x05.1_29878_c1", + "nmdc:wfmgas-13-56028x05.1_30248_c1", + "nmdc:wfmgas-13-56028x05.1_30476_c1", + "nmdc:wfmgas-13-56028x05.1_30587_c1", + "nmdc:wfmgas-13-56028x05.1_31160_c1", + "nmdc:wfmgas-13-56028x05.1_31834_c1", + "nmdc:wfmgas-13-56028x05.1_31922_c1", + "nmdc:wfmgas-13-56028x05.1_31971_c1", + "nmdc:wfmgas-13-56028x05.1_32244_c1", + "nmdc:wfmgas-13-56028x05.1_32605_c1", + "nmdc:wfmgas-13-56028x05.1_32623_c1", + "nmdc:wfmgas-13-56028x05.1_32832_c1", + "nmdc:wfmgas-13-56028x05.1_33068_c1", + "nmdc:wfmgas-13-56028x05.1_33334_c1", + "nmdc:wfmgas-13-56028x05.1_33438_c1", + "nmdc:wfmgas-13-56028x05.1_33855_c1", + "nmdc:wfmgas-13-56028x05.1_34035_c1", + "nmdc:wfmgas-13-56028x05.1_34120_c1", + "nmdc:wfmgas-13-56028x05.1_34140_c1", + "nmdc:wfmgas-13-56028x05.1_34133_c1", + "nmdc:wfmgas-13-56028x05.1_34177_c1", + "nmdc:wfmgas-13-56028x05.1_34481_c1", + "nmdc:wfmgas-13-56028x05.1_34728_c1", + "nmdc:wfmgas-13-56028x05.1_34843_c1", + "nmdc:wfmgas-13-56028x05.1_35665_c1", + "nmdc:wfmgas-13-56028x05.1_35772_c1", + "nmdc:wfmgas-13-56028x05.1_35995_c1" + ] + } + ], + "too_short_contig_num": 2005162, + "input_contig_num": 2273412, + "unbinned_contig_num": 241036, + "low_depth_contig_num": 0 +} \ No newline at end of file diff --git a/tests/fixtures/models/metagenome_annotation_record.json b/tests/fixtures/models/metagenome_annotation_record.json new file mode 100644 index 00000000..222a384c --- /dev/null +++ b/tests/fixtures/models/metagenome_annotation_record.json @@ -0,0 +1,41 @@ +{ + "id": "nmdc:wfmgan-11-009f3582.1", + "name": "Metagenome Annotation Analysis Activity for nmdc:wfmgan-11-009f3582.1", + "started_at_time": "2024-09-03T19:24:35.443721+00:00", + "ended_at_time": "2024-09-04T20:05:09.774239+00:00", + "was_informed_by": "nmdc:omprc-11-24aket55", + "execution_resource": "NERSC-Perlmutter", + "git_url": "https://github.com/microbiomedata/mg_annotation", + "has_input": [ + "nmdc:dobj-11-mmtw5j72" + ], + "type": "nmdc:MetagenomeAnnotation", + "has_output": [ + "nmdc:dobj-11-pthb2b31", + "nmdc:dobj-11-2fd45p27", + "nmdc:dobj-11-ht0ats03", + "nmdc:dobj-11-sevdef93", + "nmdc:dobj-11-dadfbk65", + "nmdc:dobj-11-2r9dh888", + "nmdc:dobj-11-hd7fse31", + "nmdc:dobj-11-8zbtsn06", + "nmdc:dobj-11-sbxx9k71", + "nmdc:dobj-11-9snwce53", + "nmdc:dobj-11-qb62ef07", + "nmdc:dobj-11-9k06j893", + "nmdc:dobj-11-6hm85g54", + "nmdc:dobj-11-pgp0fr06", + "nmdc:dobj-11-a9m5d764", + "nmdc:dobj-11-rmypsf52", + "nmdc:dobj-11-13mdyw37", + "nmdc:dobj-11-0apj5620", + "nmdc:dobj-11-kh26pk74", + "nmdc:dobj-11-zyh1nx46", + "nmdc:dobj-11-d6gdnm48", + "nmdc:dobj-11-7j8j6733", + "nmdc:dobj-11-s13ejf37", + "nmdc:dobj-11-hpn4d109", + "nmdc:dobj-11-sfanhn77" + ], + "version": "v1.1.0" + } \ No newline at end of file diff --git a/tests/fixtures/models/metagenome_assembly_record.json b/tests/fixtures/models/metagenome_assembly_record.json new file mode 100644 index 00000000..b8bcf2cb --- /dev/null +++ b/tests/fixtures/models/metagenome_assembly_record.json @@ -0,0 +1,22 @@ +{ + "id": "nmdc:wfmgas-11-0080kf19.1", + "name": "Metagenome Assembly Activity for nmdc:wfmgas-11-0080kf19.1", + "started_at_time": "2023-09-05T18:02:36.755687+00:00", + "ended_at_time": "2023-09-05T19:46:42.649106+00:00", + "was_informed_by": "nmdc:omprc-11-c82tqn53", + "execution_resource": "NERSC-Perlmutter", + "git_url": "https://github.com/microbiomedata/metaAssembly", + "has_input": [ + "nmdc:dobj-11-sgpgmp62" + ], + "has_output": [ + "nmdc:dobj-11-dtnyvj29", + "nmdc:dobj-11-4hpkwf43", + "nmdc:dobj-11-pyhh1b53", + "nmdc:dobj-11-3qp71339", + "nmdc:dobj-11-0mw8sn13", + "nmdc:dobj-11-a898mz04" + ], + "type": "nmdc:MetagenomeAssembly", + "version": "v1.0.3" + } \ No newline at end of file diff --git a/tests/fixtures/models/metatranscriptome_annotation_record.json b/tests/fixtures/models/metatranscriptome_annotation_record.json new file mode 100644 index 00000000..3e54d61c --- /dev/null +++ b/tests/fixtures/models/metatranscriptome_annotation_record.json @@ -0,0 +1,41 @@ +{ + "id": "nmdc:wfmtan-11-009f3582.1", + "name": "Metatranscriptome Annotation Analysis Activity for nmdc:wfmtan-11-009f3582.1", + "started_at_time": "2024-09-03T19:24:35.443721+00:00", + "ended_at_time": "2024-09-04T20:05:09.774239+00:00", + "was_informed_by": "nmdc:omprc-11-24aket55", + "execution_resource": "NERSC-Perlmutter", + "git_url": "https://github.com/microbiomedata/mg_annotation", + "has_input": [ + "nmdc:dobj-11-mmtw5j72" + ], + "type": "nmdc:MetatranscriptomeAnnotation", + "has_output": [ + "nmdc:dobj-11-pthb2b31", + "nmdc:dobj-11-2fd45p27", + "nmdc:dobj-11-ht0ats03", + "nmdc:dobj-11-sevdef93", + "nmdc:dobj-11-dadfbk65", + "nmdc:dobj-11-2r9dh888", + "nmdc:dobj-11-hd7fse31", + "nmdc:dobj-11-8zbtsn06", + "nmdc:dobj-11-sbxx9k71", + "nmdc:dobj-11-9snwce53", + "nmdc:dobj-11-qb62ef07", + "nmdc:dobj-11-9k06j893", + "nmdc:dobj-11-6hm85g54", + "nmdc:dobj-11-pgp0fr06", + "nmdc:dobj-11-a9m5d764", + "nmdc:dobj-11-rmypsf52", + "nmdc:dobj-11-13mdyw37", + "nmdc:dobj-11-0apj5620", + "nmdc:dobj-11-kh26pk74", + "nmdc:dobj-11-zyh1nx46", + "nmdc:dobj-11-d6gdnm48", + "nmdc:dobj-11-7j8j6733", + "nmdc:dobj-11-s13ejf37", + "nmdc:dobj-11-hpn4d109", + "nmdc:dobj-11-sfanhn77" + ], + "version": "v1.1.4" + } \ No newline at end of file diff --git a/tests/fixtures/models/metatranscriptome_assembly_record.json b/tests/fixtures/models/metatranscriptome_assembly_record.json new file mode 100644 index 00000000..5b8f6156 --- /dev/null +++ b/tests/fixtures/models/metatranscriptome_assembly_record.json @@ -0,0 +1,22 @@ +{ + "id": "nmdc:wfmtas-11-0080kf19.1", + "name": "Metatranscriptome Assembly Activity for nmdc:wfmas-11-0080kf19.1", + "started_at_time": "2023-09-05T18:02:36.755687+00:00", + "ended_at_time": "2023-09-05T19:46:42.649106+00:00", + "was_informed_by": "nmdc:omprc-11-c82tqn53", + "execution_resource": "NERSC-Perlmutter", + "git_url": "https://github.com/microbiomedata/metaT_Assembly", + "has_input": [ + "nmdc:dobj-11-sgpgmp62" + ], + "has_output": [ + "nmdc:dobj-11-dtnyvj29", + "nmdc:dobj-11-4hpkwf43", + "nmdc:dobj-11-pyhh1b53", + "nmdc:dobj-11-3qp71339", + "nmdc:dobj-11-0mw8sn13", + "nmdc:dobj-11-a898mz04" + ], + "type": "nmdc:MetatranscriptomeAssembly", + "version": "v0.0.2" + } \ No newline at end of file diff --git a/tests/fixtures/models/metatranscriptome_expression_analysis_record.json b/tests/fixtures/models/metatranscriptome_expression_analysis_record.json new file mode 100644 index 00000000..d922d96e --- /dev/null +++ b/tests/fixtures/models/metatranscriptome_expression_analysis_record.json @@ -0,0 +1,20 @@ +{ + "id": "nmdc:wfmtex-11-metat1.1", + "name": "Metatranscriptome Expression Analysis for nmdc:wfmtex-11-metat1.1", + "started_at_time": "2024-04-11T20:51:15.535533+00:00", + "ended_at_time": "2024-04-11T23:39:28.659534+00:00", + "was_informed_by": "nmdc:omprc-11-metat1", + "execution_resource": "NERSC-Perlmutter", + "git_url": "https://github.com/microbiomedata/metaT_ReadCounts", + "has_input": [ + "nmdc:dobj-11-functionalt1", + "nmdc:dobj-11-contigmappingt1", + "nmdc:dobj-11-coverbamt1" + ], + "type": "nmdc:MetatranscriptomeExpressionAnalysis", + "has_output": [ + "nmdc:dobj-11-expcountst1", + "nmdc:dobj-11-expinfot1" + ], + "version": "v1.0.8" + } \ No newline at end of file diff --git a/tests/fixtures/models/nucleotide_sequencing_record.json b/tests/fixtures/models/nucleotide_sequencing_record.json new file mode 100644 index 00000000..e4f2a433 --- /dev/null +++ b/tests/fixtures/models/nucleotide_sequencing_record.json @@ -0,0 +1,28 @@ +{ + "id": "nmdc:omprc-11-0011q207", + "name": "Root microbial communities from poplar common garden site in Clatskanie, Oregon, USA - BESC-847-CL1_28_5 endosphere", + "has_input": [ + "nmdc:bsm-11-ta8dt754" + ], + "add_date": "2021-08-20T00:00:00", + "mod_date": "2021-08-20T00:00:00", + "ncbi_project_name": "Root microbial communities from poplar common garden site in Clatskanie, Oregon, USA - BESC-847-CL1_28_5 endosphere", + "principal_investigator": { + "has_raw_value": "Mitchel Doktycz", + "email": "doktyczmj@ornl.gov", + "name": "Mitchel Doktycz", + "type": "nmdc:PersonValue" + }, + "processing_institution": "JGI", + "type": "nmdc:NucleotideSequencing", + "gold_sequencing_project_identifiers": [ + "gold:Gp0587799" + ], + "analyte_category": "metagenome", + "associated_studies": [ + "nmdc:sty-11-r2h77870" + ], + "instrument_used": [ + "nmdc:inst-14-mr4r2w09" + ] + } \ No newline at end of file diff --git a/tests/fixtures/models/read_based_taxonomy_analysis_record.json b/tests/fixtures/models/read_based_taxonomy_analysis_record.json new file mode 100644 index 00000000..999a563c --- /dev/null +++ b/tests/fixtures/models/read_based_taxonomy_analysis_record.json @@ -0,0 +1,25 @@ +{ + "id": "nmdc:wfrbt-11-00qkc311.1", + "name": "Readbased Taxonomy Analysis Activity for nmdc:wfrbt-11-00qkc311.1", + "started_at_time": "2023-03-16T19:32:00.300673+00:00", + "ended_at_time": "2023-03-17T15:24:11.217325+00:00", + "was_informed_by": "nmdc:omprc-11-rcy9x023", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadbasedAnalysis", + "has_input": [ + "nmdc:dobj-11-bg80tg92" + ], + "has_output": [ + "nmdc:dobj-11-r3fgct86", + "nmdc:dobj-11-s71n7k22", + "nmdc:dobj-11-z7scy855", + "nmdc:dobj-11-et8qjk67", + "nmdc:dobj-11-cs5ds528", + "nmdc:dobj-11-81g47g12", + "nmdc:dobj-11-2gbw5832", + "nmdc:dobj-11-vcn8z980", + "nmdc:dobj-11-7d3pk791" + ], + "type": "nmdc:ReadBasedTaxonomyAnalysis", + "version": "v1.0.5-beta" + } \ No newline at end of file diff --git a/tests/fixtures/models/read_qc_analysis_record.json b/tests/fixtures/models/read_qc_analysis_record.json new file mode 100644 index 00000000..e1e86d3e --- /dev/null +++ b/tests/fixtures/models/read_qc_analysis_record.json @@ -0,0 +1,26 @@ +{ + "id": "nmdc:wfrqc-11-014wn728.1", + "name": "Read QC Activity for nmdc:omprc-11-2e3pne24", + "started_at_time": "2021-12-01T01:47:43+00:00", + "ended_at_time": "2021-12-12T03:08:24+00:00", + "was_informed_by": "nmdc:omprc-11-2e3pne24", + "execution_resource": "NERSC-Cori", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-w9xah552" + ], + "has_output": [ + "nmdc:dobj-11-rdwend10", + "nmdc:dobj-11-fk0d5b40" + ], + "type": "nmdc:ReadQcAnalysis", + "version": "1.0.2", + "input_read_count": 179238768, + "output_read_count": 178701256, + "input_read_bases": 27065053968, + "output_read_bases": 26698546624, + "alternative_identifiers": [ + "nmdc:wfrqc-11-014wn728.1.1", + "nmdc:wfrqc-11-014wn728.1.1.1" + ] + } \ No newline at end of file diff --git a/tests/fixtures/new_state_job.json b/tests/fixtures/new_state_job.json new file mode 100644 index 00000000..839dd828 --- /dev/null +++ b/tests/fixtures/new_state_job.json @@ -0,0 +1,215 @@ +{ + "type": "MAGs: v1.3.10", + "cromwell_jobid": "9492a397-eb30-472b-9d3b-abc123456789", + "nmdc_jobid": "nmdc:66cf64b6-7462-11ef-8b84-abc123456789", + "conf": { + "git_repo": "https://github.com/microbiomedata/metaMAGs", + "release": "v1.3.10", + "wdl": "mbin_nmdc.wdl", + "activity_id": "nmdc:wfmag-11-g7msr323.1", + "activity_set": "mags_activity_set", + "was_informed_by": "nmdc:omprc-11-9cdxha98", + "trigger_activity": "nmdc:wfmgan-11-jv8kx789.1", + "iteration": 1, + "input_prefix": "nmdc_mags", + "inputs": { + "proj": "nmdc:wfmag-11-g7msr323.1", + "contig_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "sam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgas-11-0qvjnc54.1/nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "gff_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "proteins_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "cog_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "ec_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "ko_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "pfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "tigrfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "crispr_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "product_names_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "gene_phylogeny_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "lineage_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "map_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv" + }, + "input_data_objects": [ + { + "id": "nmdc:dobj-11-1x850k20", + "name": "nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "description": "Assembly contigs (remapped) for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "md5_checksum": "6debed079383eeca2045ce23b0576607", + "file_size_bytes": 2084209623, + "data_object_type": "Assembly Contigs" + }, + { + "id": "nmdc:dobj-11-fkj2kt47", + "name": "nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-9cdxha98", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgas-11-0qvjnc54.1/nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "md5_checksum": "88ec004bd037a3820060427098798666", + "file_size_bytes": 15704979428, + "data_object_type": "Assembly Coverage BAM" + }, + { + "id": "nmdc:dobj-11-f9rnav80", + "name": "nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "description": "Functional Annotation for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "md5_checksum": "349cae9b4fe62bb910f08a183e57b475", + "file_size_bytes": 1320869282, + "data_object_type": "Functional Annotation GFF" + }, + { + "id": "nmdc:dobj-11-btqzf393", + "name": "nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "description": "FASTA Amino Acid File for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "md5_checksum": "292eae73923605dae2ef9f5d582e4603", + "file_size_bytes": 1075716574, + "data_object_type": "Annotation Amino Acid FASTA" + }, + { + "id": "nmdc:dobj-11-hdty3m42", + "name": "nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "description": "COGs for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "md5_checksum": "c4d1121c1ceb1229afb7190d23553003", + "file_size_bytes": 712459544, + "data_object_type": "Clusters of Orthologous Groups (COG) Annotation GFF" + }, + { + "id": "nmdc:dobj-11-0gk70187", + "name": "nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "description": "EC Annotations for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "md5_checksum": "84cf22f39532e1bd001bea8425735a82", + "file_size_bytes": 116429630, + "data_object_type": "Annotation Enzyme Commission" + }, + { + "id": "nmdc:dobj-11-3mtmhf26", + "name": "nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "description": "KEGG Orthology for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "md5_checksum": "17d699df17c97fc28796a198cf40a328", + "file_size_bytes": 169182276, + "data_object_type": "Annotation KEGG Orthology" + }, + { + "id": "nmdc:dobj-11-7kfhf682", + "name": "nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "description": "Pfam Annotation for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "md5_checksum": "23c33758dc138e1af0f39fa1f3ca07db", + "file_size_bytes": 602929841, + "data_object_type": "Pfam Annotation GFF" + }, + { + "id": "nmdc:dobj-11-9hjg8y84", + "name": "nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "description": "TIGRFam for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "md5_checksum": "bbfded219e0b359602725c9efb4f0c54", + "file_size_bytes": 61788991, + "data_object_type": "TIGRFam Annotation GFF" + }, + { + "id": "nmdc:dobj-11-2x0wy902", + "name": "nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "description": "Crispr Terms for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "md5_checksum": "9d2255a63e39552328c4da20ccf2bb3f", + "file_size_bytes": 142989, + "data_object_type": "Crispr Terms" + }, + { + "id": "nmdc:dobj-11-r0bx4g71", + "name": "nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "description": "Product names for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "md5_checksum": "6f1325b2f8dee9b2a75598fb9645c43d", + "file_size_bytes": 401118634, + "data_object_type": "Product Names" + }, + { + "id": "nmdc:dobj-11-7mj15p44", + "name": "nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "description": "Gene Phylogeny for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "md5_checksum": "037aee803f1b81ac5ac1bccb9a18527d", + "file_size_bytes": 748420652, + "data_object_type": "Gene Phylogeny tsv" + }, + { + "id": "nmdc:dobj-11-r2zqpy26", + "name": "nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "description": "Scaffold Lineage tsv for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "md5_checksum": "efdce9771cdda8bd8548e44ef6d1d3a3", + "file_size_bytes": 503898615, + "data_object_type": "Scaffold Lineage tsv" + }, + { + "id": "nmdc:dobj-11-4k2bt072", + "name": "nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv", + "description": "Contig mappings file for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv", + "md5_checksum": "1056a6ef48ce9124de0828ee85246e65", + "file_size_bytes": 250129248, + "data_object_type": "Contig Mapping File" + } + ], + "activity": { + "name": "Metagenome Assembled Genomes Analysis Activity for {id}", + "type": "nmdc:MagsAnalysisActivity", + "binned_contig_num": "{outputs.final_stats_json.binned_contig_num}", + "input_contig_num": "{outputs.final_stats_json.input_contig_num}", + "low_depth_contig_num": "{outputs.final_stats_json.low_depth_contig_num}", + "mags_list": "{outputs.final_stats_json.mags_list}", + "too_short_contig_num": "{outputs.final_stats_json.too_short_contig_num}", + "unbinned_contig_num": "{outputs.final_stats_json.unbinned_contig_num}" + }, + "outputs": [ + { + "output": "final_checkm", + "data_object_type": "CheckM Statistics", + "description": "CheckM for {id}", + "name": "CheckM statistics report", + "id": "nmdc:dobj-11-xvjz5h55" + }, + { + "output": "final_hqmq_bins_zip", + "data_object_type": "Metagenome Bins", + "description": "Metagenome Bins for {id}", + "name": "Metagenome bin tarfiles archive", + "id": "nmdc:dobj-11-85q1v678" + }, + { + "output": "final_gtdbtk_bac_summary", + "data_object_type": "GTDBTK Bacterial Summary", + "description": "Bacterial Summary for {id}", + "name": "GTDBTK bacterial summary", + "id": "nmdc:dobj-11-j5p58211" + }, + { + "output": "final_gtdbtk_ar_summary", + "data_object_type": "GTDBTK Archaeal Summary", + "description": "Archaeal Summary for {id}", + "name": "GTDBTK archaeal summary", + "suffix": "_gtdbtk.ar122.summary.tsv", + "id": "nmdc:dobj-11-ec2fqk35" + }, + { + "output": "mags_version", + "data_object_type": "Metagenome Bins Info File", + "description": "Metagenome Bins Info File for {id}", + "name": "Metagenome Bins Info File", + "id": "nmdc:dobj-11-kg68h909" + } + ] + }, + "activity_id": "nmdc:wfmag-11-g7msr323.1", + "last_status": "Failed", + "done": false, + "failed_count": 1, + "start": "2024-09-16T19:33:32.562412+00:00", + "end": "2024-09-16T21:52:12.873101+00:00" + } \ No newline at end of file diff --git a/tests/fixtures/nmdc_api/unsubmitted_job.json b/tests/fixtures/nmdc_api/unsubmitted_job.json new file mode 100644 index 00000000..f0c69355 --- /dev/null +++ b/tests/fixtures/nmdc_api/unsubmitted_job.json @@ -0,0 +1,220 @@ +{ + "workflow" : { + "id" : "Metagenome Annotation: v1.1.0" + }, + "id" : "nmdc:e18a4870-425c-11ef-8cc6-52b18d4509d1", + "created_at" : "2024-07-15T03:46:56.000+0000", + "config" : { + "git_repo" : "https://github.com/microbiomedata/mg_annotation", + "release" : "v1.1.0", + "wdl" : "annotation_full.wdl", + "activity_id" : "nmdc:wfmgan-11-dvvh0237.1", + "activity_set" : "metagenome_annotation_activity_set", + "was_informed_by" : "nmdc:omprc-13-zd99ps92", + "trigger_activity" : "nmdc:wfmgas-13-m54qtf91.1", + "iteration" : 1, + "input_prefix" : "annotation", + "inputs" : { + "input_file" : "https://data.microbiomedata.org/data/nmdc:omprc-13-zd99ps92/nmdc:wfmgas-13-m54qtf91.1/nmdc_wfmgas-13-m54qtf91.1_contigs.fna", + "imgap_project_id" : "scaffold", + "proj" : "nmdc:wfmgan-11-dvvh0237.1" + }, + "input_data_objects" : [ + { + "id" : "nmdc:dobj-13-8ytaz779", + "name" : "nmdc_wfmgas-13-m54qtf91.1_contigs.fna", + "description" : "Assembled contigs fasta for nmdc:omprc-13-zd99ps92", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-13-zd99ps92/nmdc:wfmgas-13-m54qtf91.1/nmdc_wfmgas-13-m54qtf91.1_contigs.fna", + "md5_checksum" : "8483663a943ff4c0fc0249353676bfc1", + "file_size_bytes" : 95957530, + "data_object_type" : "Assembly Contigs" + } + ], + "activity" : { + "name" : "Metagenome Annotation Analysis Activity for {id}", + "type" : "nmdc:MetagenomeAnnotationActivity" + }, + "outputs" : [ + { + "output" : "proteins_faa", + "data_object_type" : "Annotation Amino Acid FASTA", + "description" : "FASTA Amino Acid File for {id}", + "name" : "FASTA amino acid file for annotated proteins", + "id" : "nmdc:dobj-11-tt8ykk73" + }, + { + "output" : "structural_gff", + "data_object_type" : "Structural Annotation GFF", + "description" : "Structural Annotation for {id}", + "name" : "GFF3 format file with structural annotations", + "id" : "nmdc:dobj-11-xh82sm39" + }, + { + "output" : "functional_gff", + "data_object_type" : "Functional Annotation GFF", + "description" : "Functional Annotation for {id}", + "name" : "GFF3 format file with functional annotations", + "id" : "nmdc:dobj-11-xxffzc65" + }, + { + "output" : "ko_tsv", + "data_object_type" : "Annotation KEGG Orthology", + "description" : "KEGG Orthology for {id}", + "name" : "Tab delimited file for KO annotation", + "id" : "nmdc:dobj-11-0q21fa03" + }, + { + "output" : "ec_tsv", + "data_object_type" : "Annotation Enzyme Commission", + "description" : "EC Annotations for {id}", + "name" : "Tab delimited file for EC annotation", + "suffix" : "_ec.tsv", + "id" : "nmdc:dobj-11-hq2jz695" + }, + { + "output" : "lineage_tsv", + "data_object_type" : "Scaffold Lineage tsv", + "description" : "Scaffold Lineage tsv for {id}", + "name" : "Phylogeny at the scaffold level", + "suffix" : "_scaffold_lineage.tsv", + "id" : "nmdc:dobj-11-91b6k454" + }, + { + "output" : "cog_gff", + "data_object_type" : "Clusters of Orthologous Groups (COG) Annotation GFF", + "description" : "COGs for {id}", + "name" : "GFF3 format file with COGs", + "id" : "nmdc:dobj-11-wsjkk251" + }, + { + "output" : "pfam_gff", + "data_object_type" : "Pfam Annotation GFF", + "description" : "Pfam Annotation for {id}", + "name" : "GFF3 format file with Pfam", + "id" : "nmdc:dobj-11-7stjsq74" + }, + { + "output" : "tigrfam_gff", + "data_object_type" : "TIGRFam Annotation GFF", + "description" : "TIGRFam for {id}", + "name" : "GFF3 format file with TIGRfam", + "id" : "nmdc:dobj-11-w59s2397" + }, + { + "output" : "smart_gff", + "data_object_type" : "SMART Annotation GFF", + "description" : "SMART Annotations for {id}", + "name" : "GFF3 format file with SMART", + "id" : "nmdc:dobj-11-9d1sb298" + }, + { + "output" : "supfam_gff", + "data_object_type" : "SUPERFam Annotation GFF", + "description" : "SUPERFam Annotations for {id}", + "name" : "GFF3 format file with SUPERFam", + "id" : "nmdc:dobj-11-rsffjr58" + }, + { + "output" : "cath_funfam_gff", + "data_object_type" : "CATH FunFams (Functional Families) Annotation GFF", + "description" : "CATH FunFams for {id}", + "name" : "GFF3 format file with CATH FunFams", + "id" : "nmdc:dobj-11-7kp0zp78" + }, + { + "output" : "crt_gff", + "data_object_type" : "CRT Annotation GFF", + "description" : "CRT Annotations for {id}", + "name" : "GFF3 format file with CRT", + "id" : "nmdc:dobj-11-xq6ywj49" + }, + { + "output" : "genemark_gff", + "data_object_type" : "Genemark Annotation GFF", + "description" : "Genemark Annotations for {id}", + "name" : "GFF3 format file with Genemark", + "id" : "nmdc:dobj-11-2fm8f997" + }, + { + "output" : "prodigal_gff", + "data_object_type" : "Prodigal Annotation GFF", + "description" : "Prodigal Annotations {id}", + "name" : "GFF3 format file with Prodigal", + "id" : "nmdc:dobj-11-8tjfg176" + }, + { + "output" : "trna_gff", + "data_object_type" : "TRNA Annotation GFF", + "description" : "TRNA Annotations {id}", + "name" : "GFF3 format file with TRNA", + "id" : "nmdc:dobj-11-zwd23452" + }, + { + "output" : "final_rfam_gff", + "data_object_type" : "RFAM Annotation GFF", + "description" : "RFAM Annotations for {id}", + "name" : "GFF3 format file with RFAM", + "id" : "nmdc:dobj-11-m8wsj755" + }, + { + "output" : "ko_ec_gff", + "data_object_type" : "KO_EC Annotation GFF", + "description" : "KO_EC Annotations for {id}", + "name" : "GFF3 format file with KO_EC", + "id" : "nmdc:dobj-11-4mqqx605" + }, + { + "output" : "product_names_tsv", + "data_object_type" : "Product Names", + "description" : "Product names for {id}", + "name" : "Product names file", + "id" : "nmdc:dobj-11-wvhhv202" + }, + { + "output" : "gene_phylogeny_tsv", + "data_object_type" : "Gene Phylogeny tsv", + "description" : "Gene Phylogeny for {id}", + "name" : "Gene Phylogeny file", + "id" : "nmdc:dobj-11-k3q61q69" + }, + { + "output" : "crt_crisprs", + "data_object_type" : "Crispr Terms", + "description" : "Crispr Terms for {id}", + "name" : "Crispr Terms", + "id" : "nmdc:dobj-11-nhgfya98" + }, + { + "output" : "stats_tsv", + "data_object_type" : "Annotation Statistics", + "description" : "Annotation Stats for {id}", + "name" : "Annotation statistics report", + "id" : "nmdc:dobj-11-h80q9f77" + }, + { + "output" : "renamed_fasta", + "name" : "Renamed assembly contigs fasta", + "data_object_type" : "Assembly Contigs", + "description" : "Assembly contigs (remapped) for {id}", + "id" : "nmdc:dobj-11-hdbvxx08" + }, + { + "output" : "map_file", + "data_object_type" : "Contig Mapping File", + "description" : "Contig mappings file for {id}", + "name" : "Contig mappings between contigs and scaffolds", + "suffix" : "_contig_names_mapping.tsv", + "optional" : true, + "id" : "nmdc:dobj-11-65qdwa51" + }, + { + "output" : "imgap_version", + "data_object_type" : "Annotation Info File", + "description" : "Annotation info for {id}", + "name" : "File containing annotation info", + "id" : "nmdc:dobj-11-ps99c760" + } + ] + }, + "claims" : [] +} \ No newline at end of file diff --git a/tests/fixtures/omics_processing_set.json b/tests/fixtures/nmdc_db/data_generation_set.json similarity index 60% rename from tests/fixtures/omics_processing_set.json rename to tests/fixtures/nmdc_db/data_generation_set.json index 30c6c48e..4aa68d06 100644 --- a/tests/fixtures/omics_processing_set.json +++ b/tests/fixtures/nmdc_db/data_generation_set.json @@ -9,24 +9,22 @@ "nmdc:dobj-11-rawreads1", "nmdc:dobj-11-rawreads2" ], - "instrument_name" : "Illumina NovaSeq", - "omics_type" : { - "has_raw_value" : "Metagenome" - }, - "part_of" : [ + "analyte_category": "metagenome", + "associated_studies" : [ "nmdc:sty-11-test001" ], + "processing_institution": "JGI", "principal_investigator" : { "has_raw_value" : "PI Name", "email" : "pi_name@example.com", - "name" : "PI Name" + "name" : "PI Name", + "type": "nmdc:PersonValue" }, - "type" : "nmdc:OmicsProcessing" + "type" : "nmdc:NucleotideSequencing" }, { "id" : "nmdc:omprc-11-metat1", "name" : "Test Metatranscriptome Processing", - "description" : "Metatranscriptome description", "has_input" : [ "nmdc:bsm-11-cnc9ww90" ], @@ -34,23 +32,23 @@ "nmdc:dobj-11-rawreads3", "nmdc:dobj-11-rawreads4" ], - "part_of" : [ + "associated_studies" : [ "nmdc:sty-11-test002" ], "add_date" : "2018-07-12", "mod_date" : "2021-06-18", "ncbi_project_name" : "NCBI Project Name", - "omics_type" : { - "has_raw_value" : "Metatranscriptome" - }, - "principal_investigator" : { - "has_raw_value" : "Another PI Name" - }, - "processing_institution" : "JGI", - "type" : "nmdc:OmicsProcessing", + "analyte_category": "metatranscriptome", + "processing_institution": "JGI", + "principal_investigator": { + "has_raw_value": "Investigator Name", + "email": "test@example.org", + "name": "Investigator Name", + "type": "nmdc:PersonValue" + }, + "type" : "nmdc:NucleotideSequencing", "gold_sequencing_project_identifiers" : [ "gold:Gp0324008" ] } ] - diff --git a/tests/fixtures/data_object_set.json b/tests/fixtures/nmdc_db/data_object_set.json similarity index 100% rename from tests/fixtures/data_object_set.json rename to tests/fixtures/nmdc_db/data_object_set.json diff --git a/tests/fixtures/nmdc_db/jobs.json b/tests/fixtures/nmdc_db/jobs.json new file mode 100644 index 00000000..4525c1d9 --- /dev/null +++ b/tests/fixtures/nmdc_db/jobs.json @@ -0,0 +1,228 @@ +[ + { + "workflow" : { + "id" : "Metagenome Annotation: v1.1.0" + }, + "id" : "nmdc:e18a4870-425c-11ef-8cc6-52b18d4509d1", + "created_at" : "2024-07-15T03:46:56.000+0000", + "config" : { + "git_repo" : "https://github.com/microbiomedata/mg_annotation", + "release" : "v1.1.0", + "wdl" : "annotation_full.wdl", + "activity_id" : "nmdc:wfmgan-11-dvvh0237.1", + "activity_set" : "metagenome_annotation_activity_set", + "was_informed_by" : "nmdc:omprc-13-zd99ps92", + "trigger_activity" : "nmdc:wfmgas-13-m54qtf91.1", + "iteration" : 1, + "input_prefix" : "annotation", + "inputs" : { + "input_file" : "https://data.microbiomedata.org/data/nmdc:omprc-13-zd99ps92/nmdc:wfmgas-13-m54qtf91.1/nmdc_wfmgas-13-m54qtf91.1_contigs.fna", + "imgap_project_id" : "scaffold", + "proj" : "nmdc:wfmgan-11-dvvh0237.1" + }, + "input_data_objects" : [ + { + "id" : "nmdc:dobj-13-8ytaz779", + "name" : "nmdc_wfmgas-13-m54qtf91.1_contigs.fna", + "description" : "Assembled contigs fasta for nmdc:omprc-13-zd99ps92", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-13-zd99ps92/nmdc:wfmgas-13-m54qtf91.1/nmdc_wfmgas-13-m54qtf91.1_contigs.fna", + "md5_checksum" : "8483663a943ff4c0fc0249353676bfc1", + "file_size_bytes" : 95957530, + "data_object_type" : "Assembly Contigs" + } + ], + "activity" : { + "name" : "Metagenome Annotation Analysis Activity for {id}", + "type" : "nmdc:MetagenomeAnnotationActivity" + }, + "outputs" : [ + { + "output" : "proteins_faa", + "data_object_type" : "Annotation Amino Acid FASTA", + "description" : "FASTA Amino Acid File for {id}", + "name" : "FASTA amino acid file for annotated proteins", + "id" : "nmdc:dobj-11-tt8ykk73" + }, + { + "output" : "structural_gff", + "data_object_type" : "Structural Annotation GFF", + "description" : "Structural Annotation for {id}", + "name" : "GFF3 format file with structural annotations", + "id" : "nmdc:dobj-11-xh82sm39" + }, + { + "output" : "functional_gff", + "data_object_type" : "Functional Annotation GFF", + "description" : "Functional Annotation for {id}", + "name" : "GFF3 format file with functional annotations", + "id" : "nmdc:dobj-11-xxffzc65" + }, + { + "output" : "ko_tsv", + "data_object_type" : "Annotation KEGG Orthology", + "description" : "KEGG Orthology for {id}", + "name" : "Tab delimited file for KO annotation", + "id" : "nmdc:dobj-11-0q21fa03" + }, + { + "output" : "ec_tsv", + "data_object_type" : "Annotation Enzyme Commission", + "description" : "EC Annotations for {id}", + "name" : "Tab delimited file for EC annotation", + "suffix" : "_ec.tsv", + "id" : "nmdc:dobj-11-hq2jz695" + }, + { + "output" : "lineage_tsv", + "data_object_type" : "Scaffold Lineage tsv", + "description" : "Scaffold Lineage tsv for {id}", + "name" : "Phylogeny at the scaffold level", + "suffix" : "_scaffold_lineage.tsv", + "id" : "nmdc:dobj-11-91b6k454" + }, + { + "output" : "cog_gff", + "data_object_type" : "Clusters of Orthologous Groups (COG) Annotation GFF", + "description" : "COGs for {id}", + "name" : "GFF3 format file with COGs", + "id" : "nmdc:dobj-11-wsjkk251" + }, + { + "output" : "pfam_gff", + "data_object_type" : "Pfam Annotation GFF", + "description" : "Pfam Annotation for {id}", + "name" : "GFF3 format file with Pfam", + "id" : "nmdc:dobj-11-7stjsq74" + }, + { + "output" : "tigrfam_gff", + "data_object_type" : "TIGRFam Annotation GFF", + "description" : "TIGRFam for {id}", + "name" : "GFF3 format file with TIGRfam", + "id" : "nmdc:dobj-11-w59s2397" + }, + { + "output" : "smart_gff", + "data_object_type" : "SMART Annotation GFF", + "description" : "SMART Annotations for {id}", + "name" : "GFF3 format file with SMART", + "id" : "nmdc:dobj-11-9d1sb298" + }, + { + "output" : "supfam_gff", + "data_object_type" : "SUPERFam Annotation GFF", + "description" : "SUPERFam Annotations for {id}", + "name" : "GFF3 format file with SUPERFam", + "id" : "nmdc:dobj-11-rsffjr58" + }, + { + "output" : "cath_funfam_gff", + "data_object_type" : "CATH FunFams (Functional Families) Annotation GFF", + "description" : "CATH FunFams for {id}", + "name" : "GFF3 format file with CATH FunFams", + "id" : "nmdc:dobj-11-7kp0zp78" + }, + { + "output" : "crt_gff", + "data_object_type" : "CRT Annotation GFF", + "description" : "CRT Annotations for {id}", + "name" : "GFF3 format file with CRT", + "id" : "nmdc:dobj-11-xq6ywj49" + }, + { + "output" : "genemark_gff", + "data_object_type" : "Genemark Annotation GFF", + "description" : "Genemark Annotations for {id}", + "name" : "GFF3 format file with Genemark", + "id" : "nmdc:dobj-11-2fm8f997" + }, + { + "output" : "prodigal_gff", + "data_object_type" : "Prodigal Annotation GFF", + "description" : "Prodigal Annotations {id}", + "name" : "GFF3 format file with Prodigal", + "id" : "nmdc:dobj-11-8tjfg176" + }, + { + "output" : "trna_gff", + "data_object_type" : "TRNA Annotation GFF", + "description" : "TRNA Annotations {id}", + "name" : "GFF3 format file with TRNA", + "id" : "nmdc:dobj-11-zwd23452" + }, + { + "output" : "final_rfam_gff", + "data_object_type" : "RFAM Annotation GFF", + "description" : "RFAM Annotations for {id}", + "name" : "GFF3 format file with RFAM", + "id" : "nmdc:dobj-11-m8wsj755" + }, + { + "output" : "ko_ec_gff", + "data_object_type" : "KO_EC Annotation GFF", + "description" : "KO_EC Annotations for {id}", + "name" : "GFF3 format file with KO_EC", + "id" : "nmdc:dobj-11-4mqqx605" + }, + { + "output" : "product_names_tsv", + "data_object_type" : "Product Names", + "description" : "Product names for {id}", + "name" : "Product names file", + "id" : "nmdc:dobj-11-wvhhv202" + }, + { + "output" : "gene_phylogeny_tsv", + "data_object_type" : "Gene Phylogeny tsv", + "description" : "Gene Phylogeny for {id}", + "name" : "Gene Phylogeny file", + "id" : "nmdc:dobj-11-k3q61q69" + }, + { + "output" : "crt_crisprs", + "data_object_type" : "Crispr Terms", + "description" : "Crispr Terms for {id}", + "name" : "Crispr Terms", + "id" : "nmdc:dobj-11-nhgfya98" + }, + { + "output" : "stats_tsv", + "data_object_type" : "Annotation Statistics", + "description" : "Annotation Stats for {id}", + "name" : "Annotation statistics report", + "id" : "nmdc:dobj-11-h80q9f77" + }, + { + "output" : "renamed_fasta", + "name" : "Renamed assembly contigs fasta", + "data_object_type" : "Assembly Contigs", + "description" : "Assembly contigs (remapped) for {id}", + "id" : "nmdc:dobj-11-hdbvxx08" + }, + { + "output" : "map_file", + "data_object_type" : "Contig Mapping File", + "description" : "Contig mappings file for {id}", + "name" : "Contig mappings between contigs and scaffolds", + "suffix" : "_contig_names_mapping.tsv", + "optional" : true, + "id" : "nmdc:dobj-11-65qdwa51" + }, + { + "output" : "imgap_version", + "data_object_type" : "Annotation Info File", + "description" : "Annotation info for {id}", + "name" : "File containing annotation info", + "id" : "nmdc:dobj-11-ps99c760" + } + ] + }, + "claims" : [ + { + "op_id" : "nmdc:sys0hsm3xt04", + "site_id" : "NERSC" + } + ] +} + +] diff --git a/tests/fixtures/metagenome_annotation_activity_set.json b/tests/fixtures/nmdc_db/metagenome_annotation.json similarity index 87% rename from tests/fixtures/metagenome_annotation_activity_set.json rename to tests/fixtures/nmdc_db/metagenome_annotation.json index f6c80f78..03184cc6 100644 --- a/tests/fixtures/metagenome_annotation_activity_set.json +++ b/tests/fixtures/nmdc_db/metagenome_annotation.json @@ -3,11 +3,8 @@ "has_input": [ "nmdc:dobj-11-contigs" ], - "part_of": [ - "nmdc:mga0vx38" - ], "git_url": "https://github.com/microbiomedata/mg_annotation", - "version": "1.0.3", + "version": "1.1.0", "has_output": [ "nmdc:dobj-11-proteinsfaa", "nmdc:dobj-11-structural", @@ -37,9 +34,9 @@ "was_informed_by": "nmdc:omprc-11-metag1", "id": "nmdc:wfmgan-11-test001.1", "execution_resource": "NERSC-Cori", - "name": "Annotation Activity for nmdc:mga0vx38", + "name": "Annotation Activity for nmdc:omprc-11-metag1", "started_at_time": "2021-08-05T14:48:51+00:00", - "type": "nmdc:MetagenomeAnnotationActivity", + "type": "nmdc:MetagenomeAnnotation", "ended_at_time": "2021-09-15T10:13:20+00:00" } ] diff --git a/tests/fixtures/nmdc_db/metagenome_assembly.json b/tests/fixtures/nmdc_db/metagenome_assembly.json new file mode 100644 index 00000000..1b15831d --- /dev/null +++ b/tests/fixtures/nmdc_db/metagenome_assembly.json @@ -0,0 +1,23 @@ +[ + { + "has_input": [ + "nmdc:dobj-11-filteredreads" + ], + "git_url": "https://github.com/microbiomedata/metaAssembly", + "version": "v1.0.3", + "has_output": [ + "nmdc:dobj-11-contigs", + "nmdc:dobj-11-coverstats", + "nmdc:dobj-11-coverbam", + "nmdc:dobj-11-assemblyagp", + "nmdc:dobj-11-scaffolds" + ], + "was_informed_by": "nmdc:omprc-11-metag1", + "id": "nmdc:wfmgas-11-test001.1", + "execution_resource": "NERSC-Cori", + "name": "Assembly Activity for nmdc:mga0vx38", + "started_at_time": "2021-08-05T14:48:51+00:00", + "type": "nmdc:MetagenomeAssembly", + "ended_at_time": "2021-09-15T10:13:20+00:00" + } +] diff --git a/tests/fixtures/metatranscriptome_annotation_set.json b/tests/fixtures/nmdc_db/metatranscriptome_annotation.json similarity index 92% rename from tests/fixtures/metatranscriptome_annotation_set.json rename to tests/fixtures/nmdc_db/metatranscriptome_annotation.json index 4488d185..11d09296 100644 --- a/tests/fixtures/metatranscriptome_annotation_set.json +++ b/tests/fixtures/nmdc_db/metatranscriptome_annotation.json @@ -3,9 +3,6 @@ "has_input": [ "nmdc:dobj-11-contigst1" ], - "part_of": [ - "nmdc:omprc-11-metat1" - ], "git_url": "https://github.com/microbiomedata/mg_annotation", "version": "v1.1.0", "has_output": [ @@ -39,7 +36,7 @@ "execution_resource": "NERSC-Cori", "name": "Annotation Activity for nmdc:omprc-11-metat1", "started_at_time": "2021-08-05T14:48:51+00:00", - "type": "nmdc:MetatranscriptomeAnnotationActivity", + "type": "nmdc:MetatranscriptomeAnnotation", "ended_at_time": "2021-09-15T10:13:20+00:00" } ] \ No newline at end of file diff --git a/tests/fixtures/nmdc_db/metatranscriptome_assembly.json b/tests/fixtures/nmdc_db/metatranscriptome_assembly.json new file mode 100644 index 00000000..71c07c16 --- /dev/null +++ b/tests/fixtures/nmdc_db/metatranscriptome_assembly.json @@ -0,0 +1,23 @@ +[ + { + "id": "nmdc:wfmtas-11-test001.1", + "name": "Assembly Activity for metaT", + "has_input": [ + "nmdc:dobj-11-filteredreads1" + ], + "git_url": "https://github.com/microbiomedata/metaT_Assembly", + "version": "v0.0.1", + "execution_resource": "NERSC-Cori", + "was_informed_by": "nmdc:omprc-11-metat1", + "has_output": [ + "nmdc:dobj-11-contigst1", + "nmdc:dobj-11-coverstatst1", + "nmdc:dobj-11-coverbamt1", + "nmdc:dobj-11-assemblyinfot1", + "nmdc:dobj-11-bamidxt1" + ], + "started_at_time": "2021-08-05T14:48:51+00:00", + "type": "nmdc:MetatranscriptomeAssembly", + "ended_at_time": "2021-09-15T10:13:20+00:00" + } +] \ No newline at end of file diff --git a/tests/fixtures/nmdc_db/read_qc_analysis.json b/tests/fixtures/nmdc_db/read_qc_analysis.json new file mode 100644 index 00000000..2f889b3f --- /dev/null +++ b/tests/fixtures/nmdc_db/read_qc_analysis.json @@ -0,0 +1,43 @@ +[ + { + "id": "nmdc:wfrqc-11-testmetag1.1", + "name": "Read QC Activity for nmdc:wfrqc-11-metag1.1", + "started_at_time": "2024-04-11T20:51:15.535533+00:00", + "ended_at_time": "2024-04-11T23:39:28.659534+00:00", + "was_informed_by": "nmdc:omprc-11-metag1", + "execution_resource": "NERSC-Perlmutter", + "git_url": "https://github.com/microbiomedata/ReadsQC", + "has_input": [ + "nmdc:dobj-11-rawreads1", + "nmdc:dobj-11-rawreads2" + ], + "type": "nmdc:ReadQcAnalysis", + "has_output": [ + "nmdc:dobj-11-qcstats", + "nmdc:dobj-11-qcinfo", + "nmdc:dobj-11-filteredreads" + ], + "version": "v1.0.8" + }, + { + "id": "nmdc:wfrqc-11-testmetat1.1", + "name": "Read QC Activity for nmdc:wfrqc-11-metat1.1", + "started_at_time": "2024-04-11T20:51:15.535533+00:00", + "ended_at_time": "2024-04-11T23:39:28.659534+00:00", + "was_informed_by": "nmdc:omprc-11-metat1", + "execution_resource": "NERSC-Perlmutter", + "git_url": "https://github.com/microbiomedata/metaT_ReadsQC", + "has_input": [ + "nmdc:dobj-11-rawreads3", + "nmdc:dobj-11-rawreads4" + ], + "type": "nmdc:ReadQcAnalysis", + "has_output": [ + "nmdc:dobj-11-qcstats1", + "nmdc:dobj-11-qcinfo1", + "nmdc:dobj-11-filteredreads1", + "nmdc:dobj-11-rrnafastq" + ], + "version": "v0.0.7" + } +] diff --git a/tests/fixtures/read_qc_analysis_activity_set.json b/tests/fixtures/nmdc_db/workflow_execution_set.json similarity index 86% rename from tests/fixtures/read_qc_analysis_activity_set.json rename to tests/fixtures/nmdc_db/workflow_execution_set.json index 282c0fab..b86fdd31 100644 --- a/tests/fixtures/read_qc_analysis_activity_set.json +++ b/tests/fixtures/nmdc_db/workflow_execution_set.json @@ -11,15 +11,12 @@ "nmdc:dobj-11-rawreads1", "nmdc:dobj-11-rawreads2" ], - "type": "nmdc:ReadQcAnalysisActivity", + "type": "nmdc:ReadQcAnalysis", "has_output": [ "nmdc:dobj-11-qcstats", "nmdc:dobj-11-qcinfo", "nmdc:dobj-11-filteredreads" ], - "part_of": [ - "nmdc:omprc-11-metag1" - ], "version": "v1.0.8" }, { @@ -34,16 +31,13 @@ "nmdc:dobj-11-rawreads3", "nmdc:dobj-11-rawreads4" ], - "type": "nmdc:ReadQcAnalysisActivity", + "type": "nmdc:ReadQcAnalysis", "has_output": [ "nmdc:dobj-11-qcstats1", "nmdc:dobj-11-qcinfo1", "nmdc:dobj-11-filteredreads1", "nmdc:dobj-11-rrnafastq" ], - "part_of": [ - "nmdc:omprc-11-metat1" - ], "version": "v1.0.8" } ] diff --git a/tests/fixtures/read_qc_analysis_activity_set2.json b/tests/fixtures/read_qc_analysis_activity_set2.json deleted file mode 100644 index 72a16e55..00000000 --- a/tests/fixtures/read_qc_analysis_activity_set2.json +++ /dev/null @@ -1,26 +0,0 @@ -[ -{ - "id" : "nmdc:wfrqc-11-testmetag12", - "name" : "Read QC Activity for nmdc:wfrqc-11-metag1.2", - "started_at_time" : "2024-04-11T20:51:15.535533+00:00", - "ended_at_time" : "2024-04-11T23:39:28.659534+00:00", - "was_informed_by" : "nmdc:omprc-11-metag1", - "execution_resource" : "NERSC-Perlmutter", - "git_url" : "https://github.com/microbiomedata/ReadsQC", - "has_input" : [ - "nmdc:dobj-11-rawreads1", - "nmdc:dobj-11-rawreads2" - ], - "type" : "nmdc:ReadQcAnalysisActivity", - "has_output" : [ - "nmdc:dobj-11-qcstats2", - "nmdc:dobj-11-qcinfo2", - "nmdc:dobj-11-filteredreads2" - ], - "part_of" : [ - "nmdc:omprc-11-metag1" - ], - "version" : "v1.0.8" -} - -] diff --git a/tests/import_test.yaml b/tests/import_test.yaml index af06137f..dd478cda 100644 --- a/tests/import_test.yaml +++ b/tests/import_test.yaml @@ -1,49 +1,37 @@ +#TO DO delete this file in favor of using configs/import.yaml Workflows: - - Name: Sequencing - Import: true - Type: nmdc:MetagenomeSequencingActivity - Git_repo: https://github.com/microbiomedata/RawSequencingData - Version: v1.0.0 - Collection: metagenome_sequencing_activity_set - ActivityRange: MetagenomeSequencingActivity - Activity: - name: "Metagenome Sequencing Activity for {id}" - type: nmdc:MetagenomeSequencingActivity - Outputs: - - Metagenome Raw Reads - - Name: Reads QC Import: true - Type: nmdc:ReadQcAnalysisActivity + Type: nmdc:ReadQcAnalysis Git_repo: https://github.com/microbiomedata/ReadsQC Version: v1.0.8 - Collection: read_qc_analysis_activity_set - ActivityRange: ReadQcAnalysisActivity + Collection: workflow_execution_set + WorkflowExecutionRange: ReadQcAnalysis Inputs: - Metagenome Raw Reads - Activity: - name: "Read QC Activity for {id}" + Workflow_Execution: + name: "Read QC for {id}" input_read_bases: "{outputs.stats.input_read_bases}" input_read_count: "{outputs.stats.input_read_count}" output_read_bases: "{outputs.stats.output_read_bases}" output_read_count: "{outputs.stats.output_read_count}" - type: nmdc:ReadQcAnalysisActivity + type: nmdc:ReadQcAnalysis Outputs: - Filtered Sequencing Reads - QC Statistics - Name: Readbased Taxonomy Import: false - Type: nmdc:ReadBasedTaxonomyAnalysisActivity + Type: nmdc:ReadBasedTaxonomyAnalysis Git_repo: https://github.com/microbiomedata/ReadsQC Version: v1.0.5 - Collection: read_based_taxonomy_analysis_activity_set - ActivityRange: ReadBasedTaxonomyAnalysisActivity + Collection: workflow_execution_set + WorkflowExecutionRange: ReadBasedTaxonomyAnalysis Inputs: - Filtered Sequencing Reads - Activity: - name: Readbased Taxonomy Analysis Activity for {id} - type: nmdc:ReadBasedTaxonomyAnalysisActivity + Workflow_Execution: + name: Readbased Taxonomy Analysis for {id} + type: nmdc:ReadBasedTaxonomyAnalysis Outputs: - GOTTCHA2 Classification Report - GOTTCHA2 Report Full @@ -60,12 +48,12 @@ Workflows: Type: nmdc:MetagenomeAssembly Git_repo: https://github.com/microbiomedata/metaAssembly Version: v1.0.3 - Collection: metagenome_assembly_set - ActivityRange: MetagenomeAssembly + Collection: workflow_execution_set + WorkflowExecutionRange: MetagenomeAssembly Inputs: - Filtered Sequencing Reads - Activity: - name: "Metagenome Assembly Activity for {id}" + Workflow_Execution: + name: "Metagenome Assembly for {id}" type: nmdc:MetagenomeAssembly asm_score: "{outputs.stats.asm_score}" contig_bp: "{outputs.stats.contig_bp}" @@ -101,16 +89,16 @@ Workflows: - Name: Metagenome Annotation Import: true - Type: nmdc:MetagenomeAnnotationActivity + Type: nmdc:MetagenomeAnnotation Git_repo: https://github.com/microbiomedata/mg_annotation Version: v1.0.4 - Collection: metagenome_annotation_activity_set - ActivityRange: MetagenomeAnnotationActivity + Collection: workflow_execution_set + WorkflowExecutionRange: MetagenomeAnnotation Inputs: - Assembly Contigs - Activity: - name: "Metagenome Annotation Analysis Activity for {id}" - type: nmdc:MetagenomeAnnotationActivity + Workflow_Execution: + name: "Metagenome Annotation Analysis for {id}" + type: nmdc:MetagenomeAnnotation Outputs: - Annotation Amino Acid FASTA - Structural Annotation GFF @@ -137,11 +125,11 @@ Workflows: - Name: MAGs Import: true - Type: nmdc:MagsAnalysisActivity + Type: nmdc:MagsAnalysis Git_repo: https://github.com/microbiomedata/metaMAGs Version: v1.0.6 - Collection: mags_activity_set - ActivityRange: MagsAnalysisActivity + Collection: workflow_execution_set + WorkflowExecutionRange: MagsAnalysis Inputs: - Assembly Contigs - Functional Annotation GFF @@ -157,9 +145,9 @@ Workflows: - SMART Annotation GFF - Annotation Amino Acid FASTA - Gene Phylogeny tsv - Activity: - name: "Metagenome Assembled Genomes Analysis Activity for {id}" - type: nmdc:MagsAnalysisActivity + Workflow_Execution: + name: "Metagenome Assembled Genomes Analysis for {id}" + type: nmdc:MagsAnalysis Outputs: - CheckM Statistics - Metagenome Bins @@ -173,8 +161,8 @@ Data Objects: name: Raw sequencer read data import_suffix: .[A-Z]+-[A-Z]+.fastq.gz nmdc_suffix: .fastq.gz - input_to: [nmdc:ReadQcAnalysisActivity] - output_of: nmdc:MetagenomeSequencingActivity + input_to: [nmdc:ReadQcAnalysis] + output_of: nmdc:NucleotideSequencing mulitple: false action: none - data_object_type: CheckM Statistics @@ -183,7 +171,7 @@ Data Objects: import_suffix: _checkm_qa.out nmdc_suffix: _checkm_qa.out input_to: [] - output_of: nmdc:MagsAnalysisActivity + output_of: nmdc:MagsAnalysis mulitple: false action: rename - data_object_type: GTDBTK Bacterial Summary @@ -192,7 +180,7 @@ Data Objects: import_suffix: _gtdbtk.bac122.summary.tsv nmdc_suffix: _gtdbtk.bac122.summary.tsv input_to: [] - output_of: nmdc:MagsAnalysisActivity + output_of: nmdc:MagsAnalysis mulitple: false action: rename - data_object_type: GTDBTK Archaeal Summary @@ -201,7 +189,7 @@ Data Objects: import_suffix: _gtdbtk.ar122.summary.tsv nmdc_suffix: _gtdbtk.ar122.summary.tsv input_to: [] - output_of: nmdc:MagsAnalysisActivity + output_of: nmdc:MagsAnalysis mulitple: false action: rename - data_object_type: Annotation Amino Acid FASTA @@ -209,8 +197,8 @@ Data Objects: name: FASTA amino acid file for annotated proteins import_suffix: _proteins.faa nmdc_suffix: _proteins.faa - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Contig Mapping File @@ -219,7 +207,7 @@ Data Objects: import_suffix: _contig_names_mapping.tsv nmdc_suffix: _contig_names_mapping.tsv input_to: [] - output_of: nmdc:MetagenomeAnnotationActivity + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Structural Annotation GFF @@ -228,7 +216,7 @@ Data Objects: import_suffix: _structural_annotation.gff nmdc_suffix: _structural_annotation.gff input_to: [] - output_of: nmdc:MetagenomeAnnotationActivity + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Functional Annotation GFF @@ -236,8 +224,8 @@ Data Objects: name: GFF3 format file with functional annotations import_suffix: _functional_annotation.gff nmdc_suffix: _functional_annotation.gff - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Annotation KEGG Orthology @@ -245,8 +233,8 @@ Data Objects: name: Tab delimited file for KO annotation import_suffix: _ko.tsv nmdc_suffix: _ko.tsv - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Annotation Enzyme Commission @@ -254,8 +242,8 @@ Data Objects: name: Tab delimited file for EC annotation import_suffix: _ec.tsv nmdc_suffix: _ec.tsv - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Clusters of Orthologous Groups (COG) Annotation GFF @@ -263,8 +251,8 @@ Data Objects: name: GFF3 format file with COGs import_suffix: _cog.gff nmdc_suffix: _cog.gff - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Pfam Annotation GFF @@ -272,8 +260,8 @@ Data Objects: name: GFF3 format file with Pfam import_suffix: _pfam.gff nmdc_suffix: _pfam.gff - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: TIGRFam Annotation GFF @@ -281,8 +269,8 @@ Data Objects: name: GFF3 format file with TIGRfam import_suffix: _tigrfam.gff nmdc_suffix: _tigrfam.gff - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: SMART Annotation GFF @@ -290,8 +278,8 @@ Data Objects: name: GFF3 format file with SMART import_suffix: _smart.gff nmdc_suffix: _smart.gff - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: SUPERFam Annotation GFF @@ -299,8 +287,8 @@ Data Objects: name: GFF3 format file with SUPERFam import_suffix: _supfam.gff nmdc_suffix: _supfam.gff - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: CATH FunFams (Functional Families) Annotation GFF @@ -308,8 +296,8 @@ Data Objects: name: GFF3 format file with CATH FunFams import_suffix: _cath_funfam.gff nmdc_suffix: _cath_funfam.gff - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: CRT Annotation GFF @@ -318,7 +306,7 @@ Data Objects: import_suffix: _crt.gff nmdc_suffix: _crt.gff input_to: [] - output_of: nmdc:MetagenomeAnnotationActivity + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Genemark Annotation GFF @@ -327,7 +315,7 @@ Data Objects: import_suffix: _genemark.gff nmdc_suffix: _genemark.gff input_to: [] - output_of: nmdc:MetagenomeAnnotationActivity + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Prodigal Annotation GFF @@ -336,7 +324,7 @@ Data Objects: import_suffix: _prodigal.gff nmdc_suffix: _prodigal.gff input_to: [] - output_of: nmdc:MetagenomeAnnotationActivity + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: TRNA Annotation GFF @@ -345,7 +333,7 @@ Data Objects: import_suffix: _trna.gff nmdc_suffix: _trna.gff input_to: [] - output_of: nmdc:MetagenomeAnnotationActivity + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: RFAM Annotation GFF @@ -354,7 +342,7 @@ Data Objects: import_suffix: _rfam.gff nmdc_suffix: _rfam.gff input_to: [] - output_of: nmdc:MetagenomeAnnotationActivity + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: KO_EC Annotation GFF @@ -363,7 +351,7 @@ Data Objects: import_suffix: _ko_ec.gff nmdc_suffix: _ko_ec.gff input_to: [] - output_of: nmdc:MetagenomeAnnotationActivity + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Product Names @@ -371,8 +359,8 @@ Data Objects: name: Product names file import_suffix: _product_names.tsv nmdc_suffix: _product_names.tsv - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Gene Phylogeny tsv @@ -380,8 +368,8 @@ Data Objects: name: Gene Phylogeny file import_suffix: _gene_phylogeny.tsv nmdc_suffix: _gene_phylogeny.tsv - input_to: [nmdc:MagsAnalysisActivity] - output_of: nmdc:MetagenomeAnnotationActivity + input_to: [nmdc:MagsAnalysis] + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Crispr Terms @@ -390,7 +378,7 @@ Data Objects: import_suffix: _crt.crisprs nmdc_suffix: _crt.crisprs input_to: [] - output_of: nmdc:MetagenomeAnnotationActivity + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Annotation Statistics @@ -399,7 +387,7 @@ Data Objects: import_suffix: _stats.tsv nmdc_suffix: _stats.tsv input_to: [] - output_of: nmdc:MetagenomeAnnotationActivity + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Annotation Info File @@ -408,7 +396,7 @@ Data Objects: import_suffix: _imgap.info nmdc_suffix: _imgap.info input_to: [] - output_of: nmdc:MetagenomeAnnotationActivity + output_of: nmdc:MetagenomeAnnotation mulitple: false action: rename - data_object_type: Filtered Sequencing Reads @@ -416,8 +404,8 @@ Data Objects: name: Reads QC result fastq (clean data) import_suffix: filter-METAGENOME.fastq.gz nmdc_suffix: _filtered.fastq.gz - input_to: [nmdc:ReadBasedTaxonomyAnalysisActivity,nmdc:MetagenomeAssembly] - output_of: nmdc:ReadQcAnalysisActivity + input_to: [nmdc:ReadBasedTaxonomyAnalysis,nmdc:MetagenomeAssembly] + output_of: nmdc:ReadQcAnalysis mulitple: false action: rename - data_object_type: QC Statistics @@ -426,7 +414,7 @@ Data Objects: import_suffix: .filtered-report.txt nmdc_suffix: _filterStats.txt input_to: [] - output_of: nmdc:ReadQcAnalysisActivity + output_of: nmdc:ReadQcAnalysis mulitple: false action: rename - data_object_type: Read Filtering Info File @@ -435,7 +423,7 @@ Data Objects: import_suffix: _readsQC.info nmdc_suffix: _readsQC.info input_to: [] - output_of: nmdc:ReadQcAnalysisActivity + output_of: nmdc:ReadQcAnalysis mulitple: false action: rename - data_object_type: Assembly Contigs @@ -443,7 +431,7 @@ Data Objects: name: Final assembly contigs fasta import_suffix: assembly.contigs.fasta nmdc_suffix: _contigs.fna - input_to: [nmdc:MetagenomeAnnotationActivity,nmdc:MagsAnalysisActivity] + input_to: [nmdc:MetagenomeAnnotation,nmdc:MagsAnalysis] output_of: nmdc:MetagenomeAssembly mulitple: false action: rename @@ -488,7 +476,7 @@ Data Objects: name: Sorted bam file of reads mapping back to the final assembly import_suffix: pairedMapped.sam.gz nmdc_suffix: _pairedMapped_sorted.sam.gz - input_to: [nmdc:MagsAnalysisActivity] + input_to: [nmdc:MagsAnalysis] output_of: nmdc:MetagenomeAssembly mulitple: false action: rename @@ -498,7 +486,7 @@ Data Objects: import_suffix: _gottcha2_full.tsv nmdc_suffix: _gottcha2_full.tsv input_to: [] - output_of: nmdc:ReadBasedTaxonomyAnalysisActivity + output_of: nmdc:ReadBasedTaxonomyAnalysis mulitple: false action: rename - data_object_type: GOTTCHA2 Classification Report @@ -507,7 +495,7 @@ Data Objects: import_suffix: _gottcha2_classification.tsv nmdc_suffix: _gottcha2_classification.tsv input_to: [] - output_of: nmdc:ReadBasedTaxonomyAnalysisActivity + output_of: nmdc:ReadBasedTaxonomyAnalysis mulitple: false action: rename - data_object_type: GOTTCHA2 Krona Plot @@ -516,7 +504,7 @@ Data Objects: import_suffix: _gottcha2_krona.html nmdc_suffix: _gottcha2_krona.html input_to: [] - output_of: nmdc:ReadBasedTaxonomyAnalysisActivity + output_of: nmdc:ReadBasedTaxonomyAnalysis mulitple: false action: rename - data_object_type: Centrifuge Taxonomic Classification @@ -525,7 +513,7 @@ Data Objects: import_suffix: _centrifuge_classification.tsv nmdc_suffix: _centrifuge_classification.tsv input_to: [] - output_of: nmdc:ReadBasedTaxonomyAnalysisActivity + output_of: nmdc:ReadBasedTaxonomyAnalysis mulitple: false action: rename - data_object_type: Centrifuge output report file @@ -534,7 +522,7 @@ Data Objects: import_suffix: _centrifuge_report.tsv nmdc_suffix: _centrifuge_report.tsv input_to: [] - output_of: nmdc:ReadbasedTaxonomyAnalysisActivity + output_of: nmdc:ReadBasedTaxonomyAnalysis mulitple: false action: rename - data_object_type: Centrifuge Krona Plot @@ -543,7 +531,7 @@ Data Objects: import_suffix: _centrifuge_krona.html nmdc_suffix: _centrifuge_krona.html input_to: [] - output_of: nmdc:ReadbasedTaxonomyAnalysisActivity + output_of: nmdc:ReadBasedTaxonomyAnalysis mulitple: false action: rename - data_object_type: Kraken2 Classification Report @@ -552,7 +540,7 @@ Data Objects: import_suffix: _kraken2_report.tsv nmdc_suffix: _kraken2_report.tsv input_to: [] - output_of: nmdc:ReadbasedTaxonomyAnalysisActivity + output_of: nmdc:ReadBasedTaxonomyAnalysis mulitple: false action: rename - data_object_type: Kraken2 Taxonomic Classification @@ -561,7 +549,7 @@ Data Objects: import_suffix: _kraken2_classification.tsv nmdc_suffix: _kraken2_classification.tsv input_to: [] - output_of: nmdc:ReadbasedTaxonomyAnalysisActivity + output_of: nmdc:ReadBasedTaxonomyAnalysis mulitple: false action: rename - data_object_type: Kraken2 Krona Plot @@ -570,7 +558,7 @@ Data Objects: import_suffix: _kraken2_krona.html nmdc_suffix: _kraken2_krona.html input_to: [] - output_of: nmdc:ReadbasedTaxonomyAnalysisActivity + output_of: nmdc:ReadBasedTaxonomyAnalysis mulitple: false action: rename Multiples: @@ -580,7 +568,7 @@ Data Objects: import_suffix: _[0-9]+.tar.gz nmdc_suffix: _hqmq_bin.zip input_to: [] - output_of: nmdc:MagsAnalysisActivity + output_of: nmdc:MagsAnalysis mulitple: true action: zip diff --git a/tests/site_configuration_test.toml b/tests/site_configuration_test.toml index 93b58284..06173330 100644 --- a/tests/site_configuration_test.toml +++ b/tests/site_configuration_test.toml @@ -9,7 +9,7 @@ data_dir = "/tmp" raw_dir = "/path/to/raw/data/files" [site] -resource = "Resource Name" +resource = "NERSC-Perlmutter" site = "Processing Site" [nmdc] @@ -18,7 +18,7 @@ api_url = "http://localhost" [state] watch_state = "State File" -agent_state = "/tmp/agent.state" +#agent_state = "/tmp/agent.state" Commenting this out will default to _state/agent.state activity_id_state = "/Path/to/activity_id_state" [workflows] diff --git a/tests/test_config.py b/tests/test_config.py index 61748f32..0e701d4c 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,11 +1,11 @@ import pytest -from nmdc_automation.config.config import Config +from nmdc_automation.config.siteconfig import SiteConfig def test_config(monkeypatch, test_data_dir, base_test_dir): monkeypatch.setenv("WF_CONFIG_FILE", str(test_data_dir / "wf_config")) - conf = Config(base_test_dir / "site_configuration_test.toml") + conf = SiteConfig(base_test_dir / "site_configuration_test.toml") assert conf.cromwell_api assert conf.cromwell_url assert conf.stage_dir @@ -17,7 +17,7 @@ def test_config(monkeypatch, test_data_dir, base_test_dir): assert conf.url_root assert conf.api_url assert conf.watch_state - assert conf.agent_state + assert conf.agent_state is None # not in test config assert conf.activity_id_state assert conf.workflows_config assert conf.client_id @@ -28,4 +28,4 @@ def test_config(monkeypatch, test_data_dir, base_test_dir): def test_config_missing(monkeypatch): monkeypatch.setenv("WF_CONFIG_FILE", "/bogus") with pytest.raises(OSError): - Config("/tmp/foo") + SiteConfig("/tmp/foo") diff --git a/tests/test_data/data_generation_set.json b/tests/test_data/data_generation_set.json new file mode 100644 index 00000000..24d2c7c8 --- /dev/null +++ b/tests/test_data/data_generation_set.json @@ -0,0 +1,28 @@ +[ + { + "id": "nmdc:omprc-11-nhy4pz43", + "name": "Core terrestrial soil microbial communities from Talladega National Forest, Ozarks Complex, AL, USA - TALL_002-O-10-34-20140708-GEN-DNA1", + "has_input": [ + "nmdc:bsm-11-7qhhd037" + ], + "has_output": [ + "nmdc:22afa3d49b73eaec2e9787a6b88fbdc3" + ], + "add_date": "2020-01-27T00:00:00", + "mod_date": "2020-01-27T00:00:00", + "ncbi_project_name": "Core terrestrial soil microbial communities from Talladega National Forest, Ozarks Complex, AL, USA - TALL_002-O-10-34-20140708-GEN-DNA1", + "analyte_category": "metagenome", + "associated_studies": [ + "nmdc:sty-11-34xj1150" + ], + "principal_investigator": { + "has_raw_value": "Lee Stanish", + "email": "lstanish@gmail.com", + "name": "Lee Stanish" + }, + "type": "nmdc:NucleotideSequencing", + "gold_sequencing_project_identifiers": [ + "GOLD:Gp0477109" + ] + } +] diff --git a/tests/test_data/mags_activity_set.json b/tests/test_data/mags_activity_set.json index 3206a520..3d99cd5e 100644 --- a/tests/test_data/mags_activity_set.json +++ b/tests/test_data/mags_activity_set.json @@ -5,9 +5,6 @@ "nmdc:da186671c5a581af0a7d195bd857c871", "nmdc:4d41794368ed796500bb6d2c82a6787a" ], - "part_of": [ - "nmdc:mga0vx38" - ], "git_url": "https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "has_output": [ "nmdc:d41d8cd98f00b204e9800998ecf8427e", @@ -22,7 +19,7 @@ "execution_resource": "NERSC-Cori", "name": "MAGs Analysis Activity for nmdc:mga0vx38", "started_at_time": "2021-08-05T14:48:51+00:00", - "type": "nmdc:MAGsAnalysisActivity", + "type": "nmdc:MAGsAnalysis", "ended_at_time": "2021-09-15T10:13:20+00:00" } ] diff --git a/tests/test_data/metagenome_annotation_activity_set.json b/tests/test_data/metagenome_annotation_activity_set.json index 44155700..0a5df070 100644 --- a/tests/test_data/metagenome_annotation_activity_set.json +++ b/tests/test_data/metagenome_annotation_activity_set.json @@ -3,9 +3,6 @@ "has_input": [ "nmdc:37573bca240f88091720ae61ae5c9452" ], - "part_of": [ - "nmdc:mga0vx38" - ], "git_url": "https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "has_output": [ "nmdc:7336ecf1f0b47e6161b52aec01d56ab8", @@ -34,7 +31,7 @@ "execution_resource": "NERSC-Cori", "name": "Annotation Activity for nmdc:mga0vx38", "started_at_time": "2021-08-05T14:48:51+00:00", - "type": "nmdc:MetagenomeAnnotationActivity", + "type": "nmdc:MetagenomeAnnotation", "ended_at_time": "2021-09-15T10:13:20+00:00" } ] diff --git a/tests/test_data/metagenome_assembly_set.json b/tests/test_data/metagenome_assembly_set.json index 8f420f90..7a99f1e7 100644 --- a/tests/test_data/metagenome_assembly_set.json +++ b/tests/test_data/metagenome_assembly_set.json @@ -3,9 +3,6 @@ "has_input": [ "nmdc:f107af0a000ec0b90e157fc09473c337" ], - "part_of": [ - "nmdc:mga0vx38" - ], "scaf_N50": 302542, "ctg_logsum": 5909271, "ctg_N90": 1315235, diff --git a/tests/test_data/metagenome_sequencing_activity_set.json b/tests/test_data/metagenome_sequencing_activity_set.json index d00879eb..c1be25df 100644 --- a/tests/test_data/metagenome_sequencing_activity_set.json +++ b/tests/test_data/metagenome_sequencing_activity_set.json @@ -1,9 +1,6 @@ [ { "has_input": [ ], - "part_of": [ - "nmdc:mga0vx38" - ], "git_url": "https://github.com/microbiomedata/RawSequencingData", "version": "v1.0.0", "has_output": [ diff --git a/tests/test_data/omics_processing_set.json b/tests/test_data/omics_processing_set.json index 9e0943a8..24d2c7c8 100644 --- a/tests/test_data/omics_processing_set.json +++ b/tests/test_data/omics_processing_set.json @@ -10,12 +10,9 @@ ], "add_date": "2020-01-27T00:00:00", "mod_date": "2020-01-27T00:00:00", - "instrument_name": "Illumina HiSeq", "ncbi_project_name": "Core terrestrial soil microbial communities from Talladega National Forest, Ozarks Complex, AL, USA - TALL_002-O-10-34-20140708-GEN-DNA1", - "omics_type": { - "has_raw_value": "Metagenome" - }, - "part_of": [ + "analyte_category": "metagenome", + "associated_studies": [ "nmdc:sty-11-34xj1150" ], "principal_investigator": { @@ -23,7 +20,7 @@ "email": "lstanish@gmail.com", "name": "Lee Stanish" }, - "type": "nmdc:OmicsProcessing", + "type": "nmdc:NucleotideSequencing", "gold_sequencing_project_identifiers": [ "GOLD:Gp0477109" ] diff --git a/tests/test_data/read_qc_analysis_activity_set.json b/tests/test_data/read_qc_analysis_activity_set.json index 9d6740e8..90d72c39 100644 --- a/tests/test_data/read_qc_analysis_activity_set.json +++ b/tests/test_data/read_qc_analysis_activity_set.json @@ -3,9 +3,6 @@ "has_input": [ "nmdc:22afa3d49b73eaec2e9787a6b88fbdc3" ], - "part_of": [ - "nmdc:mga0vx38" - ], "git_url": "https://github.com/microbiomedata/ReadsQC", "version": "v1.0.7", "has_output": [ @@ -21,7 +18,7 @@ "name": "Read QC Activity for nmdc:mga0vx38", "output_read_count": 101660590, "started_at_time": "2021-08-05T14:48:51+00:00", - "type": "nmdc:ReadQCAnalysisActivity", + "type": "nmdc:ReadQCAnalysis", "ended_at_time": "2021-09-15T10:13:20+00:00" } ] diff --git a/tests/test_data/read_qc_analysis_activity_set2.json b/tests/test_data/read_qc_analysis_activity_set2.json index ce9c618d..4f7a5495 100644 --- a/tests/test_data/read_qc_analysis_activity_set2.json +++ b/tests/test_data/read_qc_analysis_activity_set2.json @@ -18,7 +18,7 @@ "name": "Read QC Activity for nmdc:mga0vx38", "output_read_count": 101660590, "started_at_time": "2021-08-05T14:48:51+00:00", - "type": "nmdc:ReadQCAnalysisActivity", + "type": "nmdc:ReadQCAnalysis", "ended_at_time": "2021-09-15T10:13:20+00:00" } ] diff --git a/tests/test_data/rqc_response2.json b/tests/test_data/rqc_response2.json index 70d8b14c..ffa693fa 100644 --- a/tests/test_data/rqc_response2.json +++ b/tests/test_data/rqc_response2.json @@ -36,7 +36,7 @@ "input_read_count": "{outputs.stats.input_read_count}", "output_read_bases": "{outputs.stats.output_read_bases}", "output_read_count": "{outputs.stats.output_read_count}", - "type": "nmdc:ReadQcAnalysisActivity" + "type": "nmdc:ReadQcAnalysis" }, "outputs": [ { diff --git a/tests/test_imports.py b/tests/test_imports.py index 3ceb6a62..ccc0eb8e 100644 --- a/tests/test_imports.py +++ b/tests/test_imports.py @@ -5,8 +5,16 @@ from time import time + +#TODO NOW: update to use real import.yaml file. Unclear if this is the right thing to do based on how the tests are set up. +#ie we want to test that the binning packaging works even if we have this false by default for older projects where we want to annotate and re-bin. +#TODO NEXT: add test for import-mt.yaml similar to what is in test_workflow_process.py + @fixture def gold_mapper(mock_api, base_test_dir, test_data_dir): + """ + Base test function for code related to importing JGI records. + """ yaml_file = base_test_dir / "import_test.yaml" test_files = [test_data_dir / "test_pfam.gff", test_data_dir / "test_cog.gff", @@ -14,34 +22,50 @@ def gold_mapper(mock_api, base_test_dir, test_data_dir): test_data_dir / "test_72.tar.gz"] # proj_dir = os.path.abspath("./test_data") site_conf = base_test_dir / "site_configuration_test.toml" - omics_id = "nmdc:omprc-11-importT" - root_dir = f"/tmp/{omics_id}" + nucleotide_sequencing_id = "nmdc:omprc-11-importT" + root_dir = f"/tmp/{nucleotide_sequencing_id}" if os.path.exists(root_dir): shutil.rmtree(root_dir) - gm = GoldMapper("1", test_files, omics_id, yaml_file, test_data_dir, site_conf) + gm = GoldMapper("1", test_files, nucleotide_sequencing_id, yaml_file, test_data_dir, site_conf) gm.root_dir = root_dir return gm -def test_activity_mapper(gold_mapper): +def test_workflow_execution_mapper(gold_mapper): + """ + Test the creation of workflow execution records and data objects that are has_output of those workflow execution subclasses. + """ gold_mapper.unique_object_mapper() gold_mapper.multiple_objects_mapper() - gold_mapper.activity_mapper() + gold_mapper.workflow_execution_mapper() gold_mapper.post_nmdc_database_object() db = gold_mapper.get_database_object_dump() - assert len(db.metagenome_annotation_activity_set) == 1 - assert len(db.mags_activity_set) == 1 + #This should return 4 workflow_execution_set records becuase that is the number of records with Import:true in the config file + #note that if these records were tested against the actual schema they would fail b/c workflow executions can't have has_output be null. + assert len(db.workflow_execution_set) == 4 + # gff files are 1:1 with data objects that are has_output of nmdc:MetagenomeAnnotation + # *tar.gz files should be combined into a single data object that is has_output of nmdc:MagsAnalysis assert len(db.data_object_set) == 3 def test_unique_object_mapper(gold_mapper): + """ + This test counts the number of files from gold_mapper where the data object creation should be 1:1. + """ gold_mapper.unique_object_mapper() assert len(gold_mapper.nmdc_db.data_object_set) == 2 assert len(gold_mapper.objects) == 2 def test_multiple_object_mapper(gold_mapper): + """ + This test counts the number of files from gold_mapper where the data object creation should be many:1. JGI stores each binning file + individually whereas NMDC combines all the records into a single tar.gz file. + """ gold_mapper.multiple_objects_mapper() # Add assertions to check if the method works as expected assert len(gold_mapper.nmdc_db.data_object_set) == 1 + print(gold_mapper.nmdc_db.data_object_set) assert len(gold_mapper.objects) == 1 + #check that the data object url gets made correctly for the multiple object mapper function. + assert "https://data.microbiomedata.org/data/nmdc:omprc-11-importT/nmdc:abcd.1/nmdc_abcd.1_hqmq_bin.zip" in (do["url"] for do in gold_mapper.nmdc_db.data_object_set) diff --git a/tests/test_models.py b/tests/test_models.py new file mode 100644 index 00000000..1d9aa4ec --- /dev/null +++ b/tests/test_models.py @@ -0,0 +1,150 @@ +""" Test cases for the models module. """ +import json + +from bson import ObjectId +from pathlib import Path +from pytest import mark +from nmdc_automation.workflow_automation.models import( + DataObject, + Job, + JobOutput, + JobWorkflow, + WorkflowProcessNode, + workflow_process_factory, +) +from nmdc_automation.workflow_automation.workflows import load_workflow_configs +from tests.fixtures import db_utils + +def test_workflow_process_factory(fixtures_dir): + """ Test the workflow_process_factory function. """ + record_types = { + "nmdc:MagsAnalysis": "mags_analysis_record.json", + "nmdc:MetagenomeAnnotation": "metagenome_annotation_record.json", + "nmdc:MetagenomeAssembly": "metagenome_assembly_record.json", + "nmdc:MetatranscriptomeAnnotation": "metatranscriptome_annotation_record.json", + "nmdc:MetatranscriptomeAssembly": "metatranscriptome_assembly_record.json", + "nmdc:MetatranscriptomeExpressionAnalysis": "metatranscriptome_expression_analysis_record.json", + "nmdc:NucleotideSequencing": "nucleotide_sequencing_record.json", + "nmdc:ReadBasedTaxonomyAnalysis": "read_based_taxonomy_analysis_record.json", + "nmdc:ReadQcAnalysis": "read_qc_analysis_record.json", + } + for record_type, record_file in record_types.items(): + record = json.load(open(fixtures_dir / f"models/{record_file}")) + wfe = workflow_process_factory(record) + assert wfe.type == record_type + +def test_workflow_process_factory_mags_with_mags_list(fixtures_dir): + record = json.load(open(fixtures_dir / "models/mags_analysis_record.json")) + mga = workflow_process_factory(record) + assert mga.type == "nmdc:MagsAnalysis" + + +def test_process_factory_with_db_record(): + record = {'_id': ObjectId('66f4d5f10de8ad0b72100069'), 'id': 'nmdc:omprc-11-metag1', + 'name': 'Test Metagenome Processing', 'has_input': ['nmdc:bsm-11-qezc0h51'], + 'has_output': ['nmdc:dobj-11-rawreads1', 'nmdc:dobj-11-rawreads2'], 'analyte_category': 'metagenome', + 'associated_studies': ['nmdc:sty-11-test001'], "processing_institution": "JGI", + 'principal_investigator': {'has_raw_value': 'PI Name', 'email': 'pi_name@example.com', + 'name': 'PI Name', "type": "nmdc:PersonValue"}, + 'type': 'nmdc:NucleotideSequencing'} + wfe = workflow_process_factory(record) + assert wfe.type == "nmdc:NucleotideSequencing" + +@mark.parametrize("record_file, record_type", [ + ("mags_analysis_record.json", "nmdc:MagsAnalysis"), + ("metagenome_annotation_record.json", "nmdc:MetagenomeAnnotation"), + ("metagenome_assembly_record.json", "nmdc:MetagenomeAssembly"), + ("metatranscriptome_annotation_record.json", "nmdc:MetatranscriptomeAnnotation"), + ("metatranscriptome_assembly_record.json", "nmdc:MetatranscriptomeAssembly"), + ("metatranscriptome_expression_analysis_record.json", "nmdc:MetatranscriptomeExpressionAnalysis"), + ("nucleotide_sequencing_record.json", "nmdc:NucleotideSequencing"), + ("read_based_taxonomy_analysis_record.json", "nmdc:ReadBasedTaxonomyAnalysis"), + ("read_qc_analysis_record.json", "nmdc:ReadQcAnalysis"), +]) +def test_workflow_process_node(workflows_config_dir,record_file, record_type, fixtures_dir): + """ Test the WorkflowProcessNode class. """ + # load all workflows for both metagenome and metatranscriptome + wfs = load_workflow_configs(workflows_config_dir / "workflows.yaml") + wfs += load_workflow_configs(workflows_config_dir / "workflows-mt.yaml") + + # NuclotideSequencing workflows have no type + if record_type == "nmdc:NucleotideSequencing": + wfs_for_type = [wf for wf in wfs if wf.collection == "data_generation_set"] + else: + wfs_for_type = [wf for wf in wfs if wf.type == record_type] + assert wfs_for_type + wf = wfs_for_type[0] + + record = json.load(open(fixtures_dir / f"models/{record_file}")) + + wfn = WorkflowProcessNode(record, wf) + assert wfn.process.type == record_type + + +def test_data_object_creation_from_records(fixtures_dir): + """ Test the creation of DataObject objects from records. """ + records_path = fixtures_dir / Path('nmdc_db/data_object_set.json') + records = json.load(open(records_path)) + for record in records: + data_obj = DataObject(**record) + assert data_obj.type == "nmdc:DataObject" + assert data_obj.id == record["id"] + assert data_obj.name == record["name"] + assert data_obj.data_object_type == record["data_object_type"] + + data_obj_dict = data_obj.as_dict() + assert data_obj_dict == record + + +def test_data_object_creation_from_db_records(test_db, fixtures_dir): + db_utils.reset_db(test_db) + db_utils.load_fixture(test_db, "data_object_set.json") + # db_utils.read_json("data_object_set.json") + + db_records = test_db["data_object_set"].find() + db_records = list(db_records) + assert db_records + for db_record in db_records: + data_obj = DataObject(**db_record) + assert data_obj.type == "nmdc:DataObject" + assert data_obj.id == db_record["id"] + assert data_obj.name == db_record["name"] + assert data_obj.data_object_type == db_record["data_object_type"] + assert data_obj.description == db_record["description"] + assert data_obj.url == db_record["url"] + assert data_obj.file_size_bytes == db_record.get("file_size_bytes") + assert data_obj.md5_checksum == db_record["md5_checksum"] + + data_obj_dict = data_obj.as_dict() + # The db record will have an _id field that is not in the data object + _id = db_record.pop("_id") + assert _id + assert data_obj_dict == db_record + + +def test_job_output_creation(): + outputs = [ + { + "output": "proteins_faa", + "data_object_type": "Annotation Amino Acid FASTA", + "description": "FASTA Amino Acid File for {id}", + "name": "FASTA amino acid file for annotated proteins", + "id": "nmdc:dobj-11-tt8ykk73" + }, + { + "output": "structural_gff", + "data_object_type": "Structural Annotation GFF", + "description": "Structural Annotation for {id}", + "name": "GFF3 format file with structural annotations", + "id": "nmdc:dobj-11-xh82sm39" + } + ] + for output in outputs: + job_output = JobOutput(**output) + + +def test_job_creation(fixtures_dir): + job_record = json.load(open(fixtures_dir / "nmdc_api/unsubmitted_job.json")) + job = Job(**job_record) + assert job.id == job_record["id"] + assert isinstance(job.workflow, JobWorkflow) diff --git a/tests/test_nmdcapi.py b/tests/test_nmdcapi.py index 3b32973f..646e4c62 100644 --- a/tests/test_nmdcapi.py +++ b/tests/test_nmdcapi.py @@ -3,8 +3,8 @@ import os -def test_basics(mock_api, requests_mock, site_config): - n = nmdcapi(site_config) +def test_basics(mock_api, requests_mock, site_config_file): + n = nmdcapi(site_config_file) # Add decode description resp = {'description': '{"a": "b"}'} @@ -14,18 +14,17 @@ def test_basics(mock_api, requests_mock, site_config): assert "metadata" in resp -def test_objects(mock_api, requests_mock, site_config, test_data_dir): - n = nmdcapi(site_config) +def test_objects(mock_api, requests_mock, site_config_file, test_data_dir): + n = nmdcapi(site_config_file) requests_mock.post("http://localhost/objects", json={}) - fn = "./test_data/afile.sha256" + fn = test_data_dir / "afile.sha256" if os.path.exists(fn): os.remove(fn) afile = test_data_dir / "afile" resp = n.create_object(str(afile), "desc", "http://localhost/") - # assert "checksums" in resp - - url = "http://localhost/workflows/activities" + resp = n.create_object(test_data_dir / "afile", "desc", "http://localhost/") + url = "http://localhost/workflows/workflow_executions" requests_mock.post(url, json={"a": "b"}) resp = n.post_objects({"a": "b"}) assert "a" in resp @@ -38,8 +37,8 @@ def test_objects(mock_api, requests_mock, site_config, test_data_dir): assert "a" in resp -def test_list_funcs(mock_api, requests_mock, site_config, test_data_dir): - n = nmdcapi(site_config) +def test_list_funcs(mock_api, requests_mock, site_config_file, test_data_dir): + n = nmdcapi(site_config_file) mock_resp = json.load(open(test_data_dir / "mock_jobs.json")) # TODO: ccheck the full url @@ -56,8 +55,8 @@ def test_list_funcs(mock_api, requests_mock, site_config, test_data_dir): assert resp is not None -def test_update_op(mock_api, requests_mock, site_config): - n = nmdcapi(site_config) +def test_update_op(mock_api, requests_mock, site_config_file): + n = nmdcapi(site_config_file) mock_resp = {'metadata': {"b": "c"}} @@ -70,8 +69,8 @@ def test_update_op(mock_api, requests_mock, site_config): assert "b" in resp["metadata"] -def test_jobs(mock_api, requests_mock, site_config): - n = nmdcapi(site_config) +def test_jobs(mock_api, requests_mock, site_config_file): + n = nmdcapi(site_config_file) requests_mock.get("http://localhost/jobs/abc", json="jobs/") resp = n.get_job("abc") diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_barplot.pdf b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_barplot.pdf new file mode 100644 index 00000000..b58b2402 Binary files /dev/null and b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_barplot.pdf differ diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bin.info b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bin.info new file mode 100644 index 00000000..ebee12d9 --- /dev/null +++ b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bin.info @@ -0,0 +1,16 @@ +mbin.py : 1.0 +metabat2 : 2.15 +checkm-genome : 1.2.1 +gtdb-tk : 2.1.1 +hmmer : 3.3.2 +prodigal : 2.6.3 +pplacer : 1.1.alpha19 +fasttree : 2.1.11 +fastANI : 1.33 +mash : 2.3 +sqlite : 3.39.2 +samtools : 1.6 +EukCC : 2.1.2 +metaeuk : 4.a0f584d +epa-ng : 0.3.8 +Python : 3.9.12 diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bins.lowDepth.fa b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bins.lowDepth.fa new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bins.tooShort.fa b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bins.tooShort.fa new file mode 100644 index 00000000..a6fddc55 --- /dev/null +++ b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bins.tooShort.fa @@ -0,0 +1,100 @@ +>nmdc:wfmgas-13-56028x05.1_32164_c2 +GTAGAACATATTGCCGAAGGAATTGGCGGAATACTTGTCCTGTGGCTATGGATGGCCTTC +CTTTCCGATAGCTGGCCATTCAAATAAATGTCCCAAATAAGACAAGTCGCGGACGCTTTT +CAGCCTAACAAGGTGTTTTCGGCGAATAGCGTCCGCATTCTCATTTTTACCCAGATAGTG +ATCTTCCTGCTGATCTGGATGCTGTCGCCCACGGTGTTTCTTCCTAAGCCCGGCGAAATA +ATCACTGCGTTTTCTAACCTGTGGATGAATTACGGTCTGGGCAACGAGCTAATTACCAGC +TTCACGCTCAACCTGGAGGCCCTGGCACTATCCACGGTCGTTTCGCTGCTGCTGGCGTAT +TCGACCGTGGTGCCATTTATGCGGCCGATTGTGGCCCTGCTAGGAAAGCTGCGATTCCTT +TCACTGGCCGGACTGAGTTTCTTTTTTACCATGATGGCTTCAACCGGCCACGAACTGAAG +CTGTATTTGTTGATGTTTTCGGTCACCGTGTTTTTTGTCACCAGCATGGTCGATGTCGTG +GCCAGCGTCCCAAAAGACATCTGCGACCTAGCGCGAACGCTGCGCATGAGCGAATGGCAC +GTTGCTTGGGAATGCGTGATTCTGGGCCGGGCGGACCAAGCTTTCGACGTGCTCAGGCAG +AACGCCGCAATCGGCTGGATGATGATTAGCATGGTTGAAGGGATCTCGCGCTCGGAAGGC +GGCGTAGGCGGCTTACTACTTAACCAGCAGAAGTATTTCCGCTTGGACGCTGTGTTTGCG +ATTCAAATCTGCATTCTGTTGCTCGGCTTGGGCCAGGACTACGCCATAGGAGCGATAAAA +AAAATATGCTGCCCATACGCAAGCATGACATTAGAGCGGAAATAAAAACTTGGGATGAAA +TGGTAACGGAACTTAACAGGCACGCAGAAGCCTGCCCCACGTGTGCAGCCTGCAAAAAGA +TTGGAGATACACGTTCTTGTTGCGAGGTCGGGAAAAAGA +>nmdc:wfmgas-13-56028x05.1_269582_c1 +GATCGTGATCGCGCGTGAGCTGCTCGCAGTCGCTGCGCAACGCGCGGGCGACGGCTTCGA +GCAGGTCGGTTTCATCGATCGGTTTTTCCAGGAAATCGATCGCGCCCTGCAGCAGGGCCG +TACGCGCGATGGCCGCGTCGCCCTGTGCCGTCATCAACAGCAAGGGCAGGGAGCCGTCGG +AGCGCATCATGCGCGCCTGCAGGGCCAGACCCGACATGCCGGGCAGGCGGATGTCGACCA +GCGCGCACGCGGGTCGCTGGCACGCGGGAGCCTGCACGAAATCCTCGGCGCTGGCAAAGA +CGGTCGTGCGGTAGCCGTGCAGGCGCAGCAGCAGCGCAAGCGAGTCGCGCACCGCCTCGT +CGTCCTCGACGACGAATACATGGCCGGCATTGGGAATCTCGTTGCCCGACACTTTTTATC +CTGCGCTGCCCTGCAATGGCAGGTCGAGACAGAATGTGGTGCGTGCGCTTTCCGCGTCGA +ACCACAGGCGCCCTCCCTGGCGTTCCGCGATGGATCGCGACAGCGCGAGGCCCAGCCCCA +TGCCGCCCGGCTTGCTGGTAGCCAGTGGCGCGAACAGGCGCGCGCGCACTTCCTGCGGCA +CGCCCGGTCCACTGTCCTCCACGACGACGCGCACCGCCGGCCCCGATGCGCCACTGGCGC +GCGCCGCCTTGAGATGAATCTCGCGCCATCCGGTTTTTCCGCCCAGTGCGTCGCACGCGT +TGGCGACCAGGTTGCCGAGGATGGCGCCGGTCTGCACCGCATCCGCACACACGGGCGGCA +AGCCGGGTTCGACGCTGACGCGCCAGTGGATGTCTTCCCGCAGCAGGCGGTCGCGCAGGT +GCGCAAACGTGTTGTCGAAAAGGCCGGCGACGTCGACGCTGTCGGCGTGCATGGTGCCGG +TGCGGAAGAACTCGCGCATGCGCCGCAGGTACTGGCCGGCGCGCGCGCTTTCGAGCGAGA +TCTGACCCAGGGTTTCCAGCAGTTCGCGCTGCTCGTCCG +>nmdc:wfmgas-13-56028x05.1_269581_c1 +TTACGGCGGTCTACACCACCCGCAAAGGAAGCGACGTTATTACCGTGACCTCAACGGCCC +CGCCCAATCTCGGGCAATGGTCGGCCCCGCAGAATCTCGGCATTGTTGCGATTCATATGG +CGCTGCTCAACACGGGAAAAGTACTAACCTTCGAATATCCAACGGGCCGCAACGGAGGCC +CGACGCCGGCGCGATTGATCGATCCAGTGGCGAACACAGTAACGGATGTGACCCTGCCCT +GGCCGTACGATATTTTCTGCGCGGGCACGTCGTTCTTGTACGACGGGCGGTTATTGATTT +CGGGCGGATTGGACGACTTTCATTATCCAGCCGACTCGGGCATTGCGAATACGACTTTCT +ACAACCCGGCGACTAATACCTGGACACAAGGGCCAGCGATGAACCTGACGCGCTGGTATC +CAACGACGGTGCCGATGCCCGATGGAACGATCTTAACGGCTTCAGGCACGGCCAACGATG +GCGAGCACATTCAGTTCCAGATGGAGTCGTACAACATCAATACGAATACCTGGACGCTGC +TTCCGGCATCGGCGAACATGCCCCAGCCGAACGATACTTATCCGCTGCTGACCGAAACAC +CGCAAGGCAAACTCTTCTACTCGGCGCCGCGCATCAGCAACCTAGGCGGTGAATTGTATG +ATCCCAAAGCCAAGACGTGGTCGTTCGTCAGTAATTTGAACTGGGGACCGCGTGGACATG +CAGCCACCGTGTTATTGCCGAAATCTTCGCAGGTCATGATCGTGGGCGGGGGGGCGGCGA +AAAATGGCAATGGTGAGCCGACGGCGACGACGGAGATTATTGACTTCTCGCAACCGAACC +CGCAATGGGTTTATGGACCGTCCATGAACATAGCGCGTTACGACCACAACCTGCTTTATT +TGGCAGACGGAACATTGATTGCGGTGGGCGGAAACCAGAACTCGGAATACAGCAATCCAG +TGTTCCAGCCTGAGCTATACAATCCGGCTACTGGAGTGT +>nmdc:wfmgas-13-56028x05.1_269580_c1 +GGATTGCACATCGTCGAGCTGTACGCCGGGTATCGTGATCGAAAGAACATGCATATCCAC +GCCACAGTCGTTCATGATTTGCACTCTTCCCGGCCAATCAATGAGGCCATTTCGCACGAA +AGGCACTCGGTCCAAGAATTTCAAATAATTTTGCTCTGGCAACTCCTCTACAGGAATCGT +CAGGCTCTTGGTGTGAGCCGACCACGTCTCGGTTGCGTAAGCCTCTTCGACGGCGATGAT +CCGATTGGTTCTCGTTGCTATTTCGTTTGGCATGGTCGCTTCCTTGGTCGTTGAGGTGGG +TATCTCCTCGTCCATGCAACCTTTCGGATGATGTGCACCCCGGACGCATTCGGGCTCTTA +TGCGCTCAAGCCATGGGACAGACGACTCCCCTACGACCTGGCGATTCGGGACATGTCAGC +CAGGGGTCGCATCCAATCTCTCGGCCAGGAATGCCCCGATCTCTCCCAGCGCTTGGTCCG +CGGCCGCCAGTTTGCCTACCCCGGACACAAAGCCATGCGCCATTCCTTGCCAAACATCGA +GCCGGGCATCTACGCCCGCGTCGACGGCGCGCTCCACATAGCGCAGCGCGTCATCCAGCA +ACACTTCGTCATCGCCCACGTGCACGCGAACCGGCGGCAGACCAGCGAGGCTACCGTAGA +GTGGCGAGACTTTCGGGGCCGTCGGGTCAGCGCCGTTCAGATACGAGCTGATAAGCTCAC +GTGCTTGAGATTCGATGAAATACGGGTCAGCCGCCGCCCGTGTCTTCCACGTCTTTCCAG +TCAAGGTCAAGTCGGTTACGGGGGAAAGTGCTACCGCACCGACCGGACCCACGCGATCCG +CGTCAGTTCGGTCTGTCACGAGACGTAACAGGACGAGCGCAAGCCCACCGCCAGCGGAAT +CCCCGACCACTGCGATCCGGCCGAGATTCCGCTCCACAAGCCCGTTGTAGCACGCTTCCA +CGTCAGTTAGGGCGGCCGGAAACTCATGTTCGGGTGCGA +>nmdc:wfmgas-13-56028x05.1_269579_c1 +AACAAGTCAGCGGCAGCGGGACTTGGCGCAACCCGCGGCGGTAGCGCCGTTTCGGCTAGA +TCTGGCCGGTTTCGTTGGGATTTTTGTGGCAGGGCTCATCGCCTACTGGCCGGCGCTGCA +CGGCACGCTGCTGTGGGACGACAATGCGCACATTACTTCTCCGGCCTTGCAATCCCTCCA +TGGCCTATGGCGGATCTGGTTTGAGCTGGGAGCGACGCAGCAGTATTATCCGCTGCTGCA +CACGGCCTTCTGGCTCGAGCACCGGTTCTGGGGCGACGCCGTCGTCGGCTATCATCTCGC +CAATCTTTTTGAACATTGCCTTTCTGCCTGCCTGGTGGTTCTGATTGCCAAACGCTTGAA +GCTGCGTGGGGCGTGGCTGGCGGGATTTGTATTTGCGCTGCATCCGGTGTGCGTGGAGGC +GGTGGCGTGGATGTCGGAACAGAAGAGCACGCTTTCGGGTGTGTTTTATCTCGCCGCCGC +GCTGGCGTATTTGCGCTTCGACCGCGACCGGAAAAAATCGCAGTATTTTGCCGCGTTGGG +ATTATTTGTGCTGGCGCTCGCGAGCAAGTCGGTTACGGCCACACTGCCGGCGGCGCTGCT +GGTGGTGTTGTGGTGGGAGCGAGGCAGGTTGGAGTGGAAGCGCGACGCATTGCCGCTGGC +TCCGTGGTTTGTGATTGGTGCGACTTCCGGCTTGTTCACGGCGTGGGTGGAAAAGACGTA +CATTGGCGCGCAAGGTGCAGGCTATGAGCTAACGCTCGCGCAGCATATTTTGCTGGCCGG +GCGGATTGCGTGTTTCTACGCCGGGAAAGTTCTGTGGCCGGCGGATTTAATGTTTTCCTA +TCCGCGGTGGGATGTGGATCCGCGGATGTCGTGGCAGTGGATCTATCCAATTATCGTTAT +CGCTGTGGCGATTGGGTTGATCGCGTTAGCTCGGAAAATGCGCGGGCCACTGGCGAGTTT +TCTTTATTTTCTTGGAACATTATTCCCTGTGCTCGGTTT +>nmdc:wfmgas-13-56028x05.1_269578_c1 +CGGGAAGTAACTCGCGTGCAGGCCGCCCGTGCGTAGAACCATCATGCCGACCGCGACGCT +CAGCATGCCGGCGACCATCACCGGCATCGGCCCAAAGCGCGCCACGAGCTTGCGGGTCGT +GCCGAGCGAGAGCACCAGCACCGTCAAGGTCCAGGGCATGAAGGCCAGCCCGGTCTCAAC +CGGCGTGTACCCCAGCACCTTCTCTAGATAGAGGCTGCCGAGAAACCAGACGCCGTACAT +GCCGGTGACCAGGAAGCCGCGAGCGACGCTCGCGCCCATCAATCCGCGTAGCCGGAAGAT +CCGCAGCGGCATGATCGGATTCTCGATCCGCGCCTCCAGCGCGAGGAAGGCGCCCATCAG +CAGACCGGCCCCGATCGCGGGCAGGATCACGCTCGATGAACCCCAGCCGTCGTTGGTTGC +CTGGATGATCGCGTAGACGGCGGAGCCGATAGAGAGCGTGACCAGGATCGAGCCGAGCCA +ATCGATGCCCTGGCCGATGCCGAGTCCCTTGTCATTCGGGATCAGCGCCCGTCCGGCGAT diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bins.unbinned.fa b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bins.unbinned.fa new file mode 100644 index 00000000..e02951bc --- /dev/null +++ b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bins.unbinned.fa @@ -0,0 +1,500 @@ +>nmdc:wfmgas-13-56028x05.1_31_c1 +AAAAGGCCCCCGGGTACCCCCCCCCCCCCCCACTGCTTGATAAAAGCCGCCTCCGGTCGC +CCCATTCATAGGAGGTGTGCTGATACTCATGCGCTATCTGCGAAGTACACAAAACAAAAC +TACGCGAATATGACTAACTTTCTAGTCAAATTTTTAATAAAATGAAACGCCAAACAAAAA +AAATATATATTTGCTGCGCCCACTCTCTCGCAACCTTTACTTTGTCCGGCCCAGAAGAAA +AAGCAAGCTTGCCTTCAGCTGGGACGCTTAATGTCTGAGTCTCAGAAAAGACGAATAGTT +TAATTATGATAAAGGCCATGAAATCAGGGGCCTTTGAGTAGCCAAGTGGGGCACAAGGAG +GCGAAGCGCATTATTTATAATTTTCTGCGCTTCGCCTACCCCGTCGCCTGATCCTTTTCC +TTACACTTTAAATAGTTTACCACCATCTATAATGCGAAAAACTACGATTGGCTTGAGCAA +GTTTATGAAGATCATCCCTTTTTTTACGTGCAATCCCCATCTTTTGGGAAGCATCCAATA +TCTCATCAGATAAACATTGATCTAAGCTCATGCTCTTTCTGTTCATACGTCGTTTGGCTG +CTGCTTCGAGCATCCAACGAATGGCTAAGGTTTCTTGACGATTTGTCGCTATAATAGATG +GTACTAATTGAGTAGTACCAGAAATTCTCACTTTTTTCACTTCACAAACAGGCTTGACAT +TTTCTATGGCATTGACCAAAAGTCTTAATATATCGCCATGATGAGCTAGACAATGAAAAG +TTTTATAAACAATAGCACGAGATCTCGTTTTTTTACCATCAATCATGCAAATATGGACCA +ATTTTTTTATTAATTGTTTTTGTTTACTATTCAAGCCCCGGATATAACTCAAATTTTCTG +AGCAATTGTATGTGCTTGCCCCACGACCTTTAGGTCTTGATGGCATATGCCCTTCCAGGG +CATAGCATAAATATCTACGATGCGATAAGCGAAGCCCGAAAAAGCTTTCTGAAAAGAAAA +ATAGATTTTTTCCGCTAAATGACCAAGTTTTTAGCCAATTTTCATTTTTTTTCATTCTCG +CCTTTTCTGAAAGTCTTGCTTGGTGAAACCAATCAGAGGATGAACCAAACACAAAGCTGA +AATTCAATGATTTGACAAATAAATTCATATAGAATCTTTGGGTTTTTTTGTACCATATTT +AGATCTGCCTCGACGTCGACTCGGTATTCCCTGCAAATCTTTAACTCCTCGAATACAATG +GTATTTCACACCTGGCAAATCTTTCGCTCTACCTCCTCTAACCATAACAACAGAATGCTC +TTGCAAATTATGGCCTTCGCCGGGAATGTAAGCAATTATCTCATTTCGATTGGTTAAACG +TACTTTGGCTATCTTGCGTAAAGCTGAATTAGGTTTCTTCGGTGTTCTCGTCGAAACACG +CAGGCATACTCCTTGCTTTTGGGGACATTGAGTTAGAGCTCGAGTACGTTGTGTGCGCCG +TTTTGACTTTCTACCATGACGAATCAATTGATTTATTGTAGGCATATTTCACTTACTTGG +TTTTTTTTAGAAAGTTGCATAAAATTTTTCTTTCCCAATTCTCACGCTTTATTCGTTACG +GGAATGGAGCCTTTGGCTACTACATCCTGCGGCCTTTCATGGCTATGCTTCCCACGATTC +TTATTAACTATAAGAGCACAAAAAAAAATAACAAGAGAGGACGGTTAAGAACCAAAATAA +AGGGCGAAGAGACTGAAAAAAAAGTATTTTTTCAGTTTCTTTTATCCTTTTTTAGACAAA +AAATTCCTGCGTGCACTAGAAAGCTCTTTTTGCTTGTCTTTCAGAGCAAGCATAGTAATT +GTAAGGAAGGCTATCCCTTACGGGATTCGAACCCGTGTTCTCGCCATGAAAAGACGATGT +CCTATACCCCTAGACGAAAGGGACAAAATTACTTCGAATAAAAATGTTCGGCCAGAGCTG +CGCTGCGATAAAAAGAACTGCGGCCTGTGGCTCGTTTATGCGCCACTTTTCTCTTTCATC +TACGATAATTCATGACGCTTTCAACTAAAAACAAAAACTCTTCACCTAATTAACATTACA +CCTACACCGAGAGCAACCTTCAATAACGTCTGCATTTTCCCTTTTTTAATAAGGCCAAAA +TTTTGGGAGAAATGAGCAAAAAAAATATATATATTTTTTTTCAATTCTCAAAATATTTTT +TCATATATATATGTTTTTTCTTTGTAGTAATCTAGCTTACATTACACGTAGTGCTTAAGA +ATAATCAACTTGTGCTTGTAGCTCAATCGGATAGAGCACCAAACTACGGATTTGGGGGTT +GAGAGTTCGAATCTTTCCAAGCATGGGCCTATCCATCAAATTATACTAAGAGAATACATC +TAATAAGTTTAAAGTAAGTAGTTCCCGTATTTTTTTCTATCGTCTTGTCTTGTAAGAGCT +AGCGGAAATAGCTTAATGGTAGAGTATAGCCTTGCCAAGGCTGAGGTTGAGGGTTCAAGT +CCCTTTTTCCGCTTGAGTTTTTCTTTACCGAGTTTTCCTTTGTAATTCCACAGAGACAAA +GTTATTGCGGTCGCCAGCGGAGCGAGCCGAAGGCACCAGGAGAGAGTGACCAATAAAACC +GGGGGTTAAGTTGCTTGGCCTTGAGGCCGCAGCTTCAAATATTTGAAATAAAAATCTAAA +AAAAATATTTTTTTAATCATTTGTCACGATTTATTTTTTTAATCATTTGTCACGATTCAG +GGCGCAGGTGCAGCCATCTTTTTTCGCGCTGCGGCGGCCATTTCCCTGTGAAACGCGGTG +AAACCGGTGCTGCGCCCACATTATTCGCATAACAAATGATGGGGCTGGAAACCGGGAGGA +TGGAGTAATGAAACGGTACGGAGAGTCATTGGAGGCACAGTGCTTTCCTTGTTTTTAGCA +AAGGCTAAATAAAATATCTGCGAAAAAAATTCTTTTTTTTATTGCGCCCCGCGAAAAGCT +ACGCTCTGTGGCCTTGCTCGAATTCAGCCTTAGATTCAATTCAGACTTGCGGATTATGCA +ATTATTATGGTGAAATTATTATAGTAAACTTATAAAAAAAAATTTTTATTTATTTTTCCA +CATACAACTTACAAACATAGACTTAGTGCCATTTGATGGGTGACTAAGAATAGAGGAGAG +GGGTATAGAAAGAAAAAACTGATCAGAAATAAAGTAATTGCTAGTAGTAAGGATTTTTCA +CGATCCATTGGTTTATATAGAATCCATTTTTTAGGTGTATCAAAATACATTATTTTCACA +AAGCGTATATAATAAAAACAACTAATAACACTAGTAACAACTCCTATTAAGGCCAGTAAG +TAAGCCCCACAGCCTAAAGCAGCAAAAAACAAATAAAATTTGCTACAAAATCCGGCTAAC +GGGGGTATTCCTGCGTATGAAAACATAGTAATAGACAAGGTAATAGCTAAAATAGGATTA +GTTTTGGCTAGAGCACCTAAATCTGCTATATATTTGAAATGGTTTTGACGTAATGCTAAA +ACTATGGCGAATACATTGATGGTCATTAATACATAGATAAAAACACCAATTAGTAGCGAT +TGAATTCCTTCTATGGTTCCACATGAAAAACCAATAAAAAGATAACCTACATGTCCGATA +GAACTATAAGCTAAAAGTCTTTTGACTTTGTTTTGAGCCATGGCGGCCAATGCTCCTAAA +ATCATAGAAGCAATGCTGCAAAAAAAGAATAGTTGTTGCCATGTTGGATCATAAAAACTA +TAAATAAAAACACGTACCATATTGGCAAGGATAGATATTTTAGGTGCAATAGAAAAGAAT +GCTGTAACCAAAGTAGGTGAACCCTCATATACATCAGGTGCCCACATATGAAAAGGAACT +GCTGTGATCTTAAATAAAAAACCTACAGCAATAAATAAAATCCCCATAAAAATACCACTA +GATTGAGCACCGAATAAAGTGATTTCATATCCAGTGAAAATCTTAGCTAATTCCTCAAAG +TTGGTAACTCCAGTAAATCCATAAATCATAGAACAACCAAACAATAAAATTCCGGAGGAG +AATGCACCTAAAATAAAATATTTTAAGCCGGCTTCTGTGGAGAATTCAGAGTCTCTTTTT +GATGCTGCGATCACATAAAAACATAAACTTTGAAGCTCAATAGCTAAATACATGGCAATT +AAATCATAAGCCGATATCATGAATAGCATACTGCAAGTAGAAAGTAGAATTAAGACAATA +GATTCAAAAGCATTCAAACTCTCTTCTTTGAAATAACCCAAACACATAACTATAGTGCTA +GCCGTACTTACTAATAGAAAGATTTGGCAAAAATATGTAAAATTGTCTATTATTAAATTA +TTATAGAATAAATTGGCAACAGTTAAAGGTGCGCTAGAGGCCACCAATAAGATGGTTATT +AGATACCGATAGGATTCTTACCCCCCGGTCAGAACCACACGTGCAAGTTTCCTTGCATGT +GGCTCGTCCATGATAACTTCTTCAGGTCTACGGATCAATACCCCCTAAGGCTGGGCCTGC +ATAAGTGAAATAGAACACTGATCATGTTCGGAGTTGGCGTTATGCCATATTGTCTTCCAT +TCCTTTATTAGTTACGTCCATAGGTAGATTAATTAAATTTGTTGCTTTGCCCAGCTGTTG +ACCTAGTCTTTTATTCAGGGTCGGTATATTAACGTAGGAAGTCTCATTAATATGAGGCTG +AAAGTGGTACTCAGCGAAAAAAATATTTTTTTTTCCCAGTCAGCCAGCCTTTAATGGCAT +TATCCTAAATTGCTTCTCTGATTTTGCTATACTTTCCAGACGCGGCACGCATCCGCATGT +TTCCAGTACAGCGCATTATCACCTACCTCCCTTTCTTCCAAGCCTTTCACTCTAAAGGGC +ATCGTCGTATGCTCGACATTAATTGCCCGGTGTATTTCTTGGGGTACATTATGGAAGGAT +CTGTCACCCGTACATTTTTGGCTAAAGCCTGGGTCTCCAAGGGATTTTCAAAAGTATTTT +TGAAAATCGTAGCAAAATATTTTTTTTTTGCTACGCCCTGCTTTTCCCGGTCCCTATAGA +GTCCATTTGGGTGATCCCTAGTTTGCGCGTTTGGCCGTTTGCTCGTCGTTTAGTTACGTG +TACCACTCTTCCTGTTTATTACAGTTACATCCGGAGGGTTGCTCGCACCTGAGTAGCGTT +CGAACCCACGTGCCTTCCGGCCCCGCCTTTCGTTGGGGGAAGTTACCTTACTCCTATGCG +TACGATTGTTCTTGCGACGAAACGCGGATAGTACCCATGCTATGGTCCCCTGCGGTCTGA +TCCCACATAAGGTTAGGGAGTCTACCCGAGCGATTGTTTTCAACCTTCGTCTTCCCAAAC +GCACCCTCCTAGGCGCACAACACTAAGTAAACCAAGCCAACTCACATTACACACTAATGG +TGGATAATCATATTTTTTAGAGGTACTAAATACAACTCCATAAATAAGCAAAATGATGGT +TGCGTTAATAAGAAAGATCTCTGGGAAAAGCGCTAAAAAATCATGTTCAAACATTTCTTC +AAACCTCTTTTTTATGTTTTTTAATCAAATTTTCCATGTTGCACTAAGTTACTTACGGAA +GTATGCATACACTCTAGGAACACTTCAGGGTAAACACCCATCCAAATAACTCCAACAATA +AAAGGTAAAAATATTAGAACTTCTCTTCTATTCAAATCGGAGAATTTTTGGAGGAATTTG +GGTTTGAAATTCCCAAAAATCACACGATTATATAGCCAAAGAGAATAAGCTGCGCCTAAA +ATCATCCCAAGTGCCGCTAATGTGGCCACTAAGCTATTTCTTTGGAAAGCTCCTACTAAA +ATAAGAAATTCCCCAATAAAGCTGCTAGTACCGGGTAAACTCATATTGGCTAAGGTAAAG +AATAAGAAAATGGTAGAGAACATTGGCATGGTGTTGACCAAACCTCCATAATATTTAACA +AGTCGAGTCTTATGTCGATCATATAAAACACCAACACACAAGAAAAGGGCTGAAGAAACC +AGTCCATGACTTAACATAAGTAAAATACTACCTTCAATTCCCTGTATGTTTAGACTGAAC +ATACCAATAGTCACAAAATTCATATGAGCTACCGAAGAGTAAGCAATAATTTTCTTCAGA +TCGATTTGTCTTATTGTAGTCAAGGAAGTATATATAATAGCAATCACGCTTAAAGTATAA +ATGAAAGGAGTGAAATAAAGTGTCGCTTCAGGAAACATAGGTATAGAAAATCTTAAAAAA +CCATAGGTTCCTAATTTTAAAAGAATCCCTGCCAATATTACAGATCCAGCCGTAGGTGCC +TCTACGTGAGCTTCAGGTAACCAAATATGAACTGGTACCATAGGCACTTTTACGGAGAAA +GAAGCAAAAAAAGCAATCCATAGTAATATTTGGCGCCGCTCACTAAATTCTGTGGTTAAT +AATATTTGTAAATCAGTGGTTCCTGTTTGGAAGAAAATAAATAAAATGGCTAAGAGCATG +AAGACAGATCCAAGTAAAGTATATAAGAAAAACTGATATGCTGCTTGTATTTTTCTTTGT +CTAGAACCCCATACCCCTATGATAATAGGAGAGTAAGGGATAGGCCCTCTGCTTTGTCTT +CCCAGGATAAGTGGGTCGAGAAAAGGCTCCTGTAGGTACCTACTCCCTTCTCAGAGAACC +GTACGTGATACTCTCGCATCATACGGCTCCGTCTCGAAATAGCTGATGAGTCAGAAAGAA +GGACCAAGCAAAAAAAAAATATATATATTTTTTGCAGAAAGGGCTTTACGCCTTTTTTTG +GCTTGTAGTTTTTTGGGCAAAAAAAAATATGTTTTTATTTGGTCTCCTCGTTTGTTCCAG +CAACTCATTCTGCGGCCTTGCCAATCGCTTCCCGACGAAGGAATTGACCTGAGGCCTTCT +TCCAAGACCAGGACGATTATCCTTGTCAGCTCAATAGGTAGCTGACGAGTTTACGTCCAT +CCCTGGGCGTCGGGTACTGTTCCCTTCCCCGCCACCCTTGTAGCCGTCACCCGTGATTCG +CGCAAGTGTGTCTGCATCCCCTAGACTTGACGAAAACTGTTTTCTCGCCGCAAAACTCCA +TTCTCCCCTGCCTAGGAGCGCCCTTTCTTGCATGTTTGTACAATACTTGAGTAGGCGGAG +TGAAGTCCTGCAAGGTACTCACTCAGAGTACCCCCCCAATCCCAAATAGGTCATCCGACG +GGTTCAACCAAAAAGCTATGCTTACACACAAGACTACCCTTCTCCGAAAGCTTCGCGGGG +ACTACTAGACTTAGATCCGCCCGAAGGGGTTTACTGCATAAGGCTCCTGCAGAGGCGGCG +CTTCGCGCCGCGAATATCAGATGCTCAGCTCCGCACAACATAGGGATTAAAACGCTTTCA +AAAAAGACATAAAATAGTAAAAGATCCAGCATGCAAAACACAGCAATCATGAAAGATTCA +CAAATTAAAAACGCTATCATATACTCTTTTTTATAACTTTTAATACTGGACCAACCTACT +AGAATGCAAATAGGAATTAAAAATGTGGTCAGGACCACAAAAAATAAAGAGATACCATCT +ATACCTATATAAAAATTGATGTTTGAATAAGGAAGCCATCGAATAGTTTCCACGAATTGA +AATTTAGCTGTGGAATTATCGAATTGTATCCAAAAAAGAAGGGAATACAAAAAAGTAATC +AGAGAAGTGCACAAACCAATACTTCGTATCAGTCGTATTCTGAAATCAGGGATAACAAAA +AGAATAATGCTTCCTAACAAAGGACACAGAATAAGACCACTGAGATTGGAATAAAATGGA +GCTAAAAATTGTAACATAAATGTTTACTCTTTTTCCAAAAGATCCCGAGGACGCAGGTCC +ATATTTCTTCCTGGCTTTCCAGCCATAGCGGCCCTATGGGTATATCATCATCACCCCTGC +ACTTATCGTACATAAAGTCCGCAAAAATTGCATAACTTGATGGGCTTTTGTACGCACATA +CTATGTAATTGCATGTTTCGCCTTTCGCTGCTTTGCCCAACGCTTTACTTTAGGGCTTGC +TACTATGACCGGACGGCCCCAGTCAGGGTCGGAGGGATAAGAAAAAGCCGATTGGGCAAA +AAATTTATTTTTTTTGCTTTTTAGGCTTCGGGCCTCGGCCCTCCTTCTCAGCAAAGCCGA +AAAAAGGGCGTAGCACTGGCTTATCATTGCATCCCTTAAAGTTTCATGGTATTATATGAG +GAAGTATGGCTTTTTAGTTCGTGCTAATGTCTTTTTCAAAATGAATAAATAGAAAACTCA +CTATATAAATAAAATACAATCGATTATCTACCCAAAAAGAAATAAAATCCCACAGACCTA +TTATGGTAATAAATATGGTTAAGCCAATTAACATCACAAAAGCATAATGATAAACAAAAC +CACTTTGAAGTTTACTTATCTGCTTGGCTAATTTTCGAAATGTGTACGAAATCCCATAAG +GCCCCAAGATTTCAATAGCACCCTTGTCTAAAACCTTAAATGAGACTTCATATCCGAAAC +GCAAGAAGAATCTAACTATAAAGTCATTAAAAATTTTATCAAAAAACCAGCGCTTATTTA +AAAAGCAATATAATCGATTACCCAAAGTACTAGTTTTCGAAGCGAAAATGAATTGATTTG +CTACAAAATTTATATTATACGCTATAAAAGCACCTAAAGTACTAAATAAAATAGGAATTA +ATTTGATAATTGTTGGAGTAGCAAACTCAGATTCGGCAAGAATTTCATTTTTTGGTAGTA +TAAAAAGGGAATTAGCCCAAAAATTGGTACCTAAACCAATCATCATATCGGTCCCTAAGC +CGTTCCATTGCTGGAACGGCTTGGCTTCAAAACCGTACGTGAGGCTTCCGCCTCATACGG +CTCCTCTCAGGATTTTTCCGTCTTCCCGCTTGTTTTCAACATGGCAGTGCTTGTCCAGAA +GTTAAAGGTTGTGTTCTACGAACACGCAAGGATTTCACTTTTATGTGGTCCACTTTTATT +TCCTCGTGGTTTTCTAACATCTTTGGACAATTTATTTAAGAGCCCTTTCAGGAGTTTGGC +TACTGCATTTGTAACCTCAGATTTCAGTAGATAGTATTTCCGTTGAAGGTGTGCAGCGGA +ATAGAGAAGTCAGGACAAAGAAAGAAATATAGATTTTTTTGCTGTTGCGCCCCCTCTTCA +CGCGGCTCGTAGTTTTATTGGGCTCTTATTTTGTTTTGTCTTAATGTGCTTGTGGCTAGC +TATCCAACTCAGTTTGACCAGGTAATATTCTTTCTTCACCCCAAAGGCCGTTGTGTAAGA +GTCGTACAAAATCCAGTTATCTTGGTATACTTTCCCTTGGAAGAGCCAGCTTCTCTTTTC +GGGGAAGTACTTTTGTTTGATTTTATCACGACCCCATGTCGGGTGCCGTCGGTATACCCA +TGCTCGGATCTTTTGAAAGATGTCATGGTCTAATCTCCGAAATATATTACTGCATTCAGA +GTATTTGAAGTAGTTGCCCCATCCCACTATTTTGGGTACCAGGGTTATAATAAGTTGGTA +GGCTGCTTTTCCTTTTGAAAATTGAATGGCCCGTCGAATATCTTCGAGAAATCTCTGGCG +AGATTCACGAGACGGAGTTATTAAAGTTCTCACTTTGCCCCATTTCCAGATATGATTAAT +TGAAAACCCTATAAATTGGAACCCGGACCCGGTGTTGACTATCTGGATCTTATCTGAGTG +CAATTCTAGTCCCATGCACCTTAACCATTCCCTCAGGAATGTCTGAGCTTGTTCTATGAC +CTCCCTGGATTGGTGAAGTATAACGAAGTCGTTGGCATATCTAACCAGTATCGCAGTTGG +TTCTTTGGGATATGCTTTCAGTGACCACTCTGTTAAAGCTGTCTCCATCCCATGGAGAGC +AATGTTGGCTAGCAGTGGGGAGATGACGCCACAAGTGCCCATATTCCTGGTTTCCGCGTA +ATTTCCTAATCCGGTCATGAGGTCGACTTTAAGCCAGGCTTTGACCTGTTTGGCTAAACT +AGGCATAGTTTTTAATTTATTTAAGAGGGCTTCGTGGTTGATGCGATCGAAACATTTGGA +AATGTCCGCGTTCAGAACATACTTGGGCTTGGCTCGAATAGCTAAGAATATAGCTTTGAT +GGCATCCTGGCAGCCTCGTCCGGGTCTGAATCCATAGGAATTGGGTTCGAAGCGGGCTTC +CCATTCAGTTTCTAGGGCCATCTTGGCCAAAGCCTGCTTGGCTCTGTCTTCGATAACAGG +GATAGGCCAGGAAGATAAAAAGGCGCGAAGTTTAGTTCGAAAAAAAATATATAGATTTTT +TTTTGCTTTACATTTTCCGGGCGCAAAACGTGCTCGATTGACTCTACGTTTTCTTGCGCG +CAGCGCAGAAAAGGCGCAACAAAATCTATATATTTTTTCTGCTTGTAGTTCTCTGGGGCG +CTCTTTTTCCTTCCAGGGCTTTGATATTCTTATTCGACGAATGGGCGTAGCCTTCCCATC +CAATTGAAGACCTCGTGCCATCTCTATTTTTTGTGACCCTGAGGTTACCATCCTCTTGTA +AATGTCAGGGTCCTTTTTCCCTGGGTTCTCCTGTGTAACTTGTCTCACGGCCAAAAGTCT +GGCGTGTAGATTTCGTATAAGTCTAGATTGTAGAGCACGCATCTTGTCCATATCGTTGAA +GCGAGAAGCTTGGTAAATCCTGGTTTGGAGTCTAAAAACAACTGCCTCGACCTTGGGCCA +AGGAATTTGTTCCCAGGGTATCGTTTGGGTATCTATGTAGAACCTACTCATAACTCATTC +TCCTTTCCAGTTTTTACTGAAATCCTAAATCTCCAAGTAAGCATAATAAAAATGCTGCGA +AAATAAATATATTTTGGCTGGCTGCGCCCTGCGGCCTTGGCTTTTTAGAGAATCCTGCTC +TCGTAGGGCTTGTAGCTTGGCAGCCCGCCACAAGGGAGCCCTACCAGAGTCTTCCAGTTT +CGAGTTTATTGGATAGTTATAGCGGCCCATAGGCGCAAGATGTACTTTGCGGGGTGGTTC +CTTGGACATAGTCCTTTCAGACAGTGGCCGTTTAGTCCATGGTCCATTGGATGTTCGGTG +CAAGACCAAAAATTTGCACTGCAAGTACCCCCTAATTATTCCCCCTCACTATAGGGCCCG +TCCCTCAGTGTGGTCAGTACTTGATACTGTCAGGCAGCAAAGCTTCCACTTGTTTACTTA +TGATGGTTCACCAAGGCCTCTTTCCTCCTCCCTTTTTTGCTCACTTATGACTCAGAAGCG +GCCAGACCTCCTACAAAGGAAGAAGAGTCGACTGAACATCTCAGCCATTGGCGGGAATTT +CGCCCGCATCCAATTCTCAATTATCGTTCACCCAACACGAAAAAAATATATTTTTTTCGT +GTTGGGTGAGCAACAGCCACTTCGTCACAGGAGTGACTTATGGGCTAACAGGTCACACTT +TGGCCACGTATCCTACAAAAATACTTCCAAAAGCTAAAAAAATTAAAGGAATTGCCATAA +GAATGGGCGCATCATGACATCGTAAGATGTCTCGTTTGAATGAATTTGTTGGTGCTAAAA +AAGTTAGAAAAAGTAAACGAAAAGAATAATAAGAAGTAAAGAAGACAGAGACACTTCCCA +ACCAGAAAGCAAAGTTACCACTAATCGTATATTTAGTATAAGCGAGCTCTAAAATAACAT +CTTTAGAGTAAAATCCAGTACAAAAAGGGAAACCTATTAAAGATAAGCTTCCTATAAGCA +TCATAGCATAAGTAAAAGGTAACAAGGAAGCAAGCCCTCCCATCTTACGCATATCTTGCT +CATCCGACATGGCATGAATCACTGAACCTGCACTCAAGAAAGGTAATGCCTTGAAAAAAG +CGTGATTCATTAAATGGAATACGCTAACGGAATAGTTTGAAATGCCGCAAGCAAAAATCA +TATAGCCTAATTGACTACAAGTTGAATAGGCTATGACCCTCTTTAAATCGTTTTGTAATA +TTCCAGTGGTTGCTGCGAAGAATGACGTCATAGCTCCTATAAAAGTAATAACAATCAAAG +CAATGGGTGAATATTCGAATAAAGGAGAGCACCTTGCTATCATAAAAACGCCTGCTGTTA +CCATAGTAGCTGCGTGAATCAAAGCAGATACTGGAGTGGGCTACTCTTTAATAACCTCTT +ACGCTTTCATACGGCAAAAAAATAATATTTTAGAGCATTGGGTAATAAGCTGATGAGCCT +AGCAAGAGTAGGGACTGGATCTTCCACTTATGAAATAGAGACTCTCCCAAGAATCTTACG +GATGGCCGCTGCGCCCTTAACAACTAAGATGTTTTTTCTCTTTTATACATGAGACGCCAT +CTCAGCCCCATTCTAACGCTTTTCGAAAATATATATATATATTTTGCAAAGCAAAAAAAA +ATATTTGGGCTTTTTTGAACTGAATCCTGGTAGATCCAGCGCGCAGCGCTTTGGTAACGA +TGACGATAAAGCTGAGCTCAGGCCGAAAGTTACGATGTAACACCAGGTTGCGCTGGAAAA +AAACAATAGATATACTCTTTTATTCCGCTTCTTTCAAATAAGGCTTATAACCAAAATGAT +AAGTATTTGATTTGATACAAGGTTTCTTTACATGCCCAAACCCTATACGGATCCTTCTAT +TCCATAAAATCTGCACATCTAGATTATAGAATCTAGAAAAAAATGATTAGACCTTAACGA +CAAAATAGTGCTTCGTACCTTAGGTAAATCTGGCCAGCTTATCTTTCCAGGGATAAATTC +CATTTTGAACAAGAGGTCTCACGTACAGTCTCTGAGGATCCTATAGAGCTCCTTCAGCCT +TTACTTCGCTGGGTGGATTTGGCCTTCCTTCATAGGTTTCCTGCTGATTGGTCAAAATGA +GAGATTTTTCCAATAAGGACTCTCATTCGGTCGACGATTCCAGCATACAGTGAGATTGTT +AAAATGTACCTAATGGCACATAAGAGCCCTCGAATATTTCAAAAACCCTCCATTGCATCA +GGTAACCAAGTATGCAATCCTATTTGTGCAGATTTTCCAACAGCGCCAATAAAAAGTAAA +ATACAAATAACAGTTATGGCATGAAATCTCATATTGCAGAAAATGAAATAATGATGGGGT +TCGGAAAAGGCACTAGCACAAGCAAAAATAGTCGAAAAGTCTACTGTTTGAAAGATAGTG +AAACAACCCATAATCCCGAGAGCTAATCCGAAATCACCTACTCGATTGACAAGCATAGCT +TTTATAGCTGCTTTATTGGCTTGAAGCCGTGTAAACCAGAAATTAATTAACAAATATGAA +GCGAGACCTACTCCTTCCCATCCTAAAAATAATTGAATAAAGTTATCTCCAGTAACTAAC +ATTAACATAAAAAAAGTAAAAATAGATAAATAGCACATAAATCTAGGGCTATGCGGATCC +TCGGACATATATGAAATAGAATAAAGATGAACTAAGCTACTTACAAATGTAACCACAATT +AACATAACTACAGTCAAACTATCAAACACAGAGTCAAAATTTTTACTTTACTGGGGCCTT +AAATCGCAGAATCAAACGGTAAATTTTTCGCGTACCGCAATGCGGCGGCCATTCACCCAA +GTGAAATAATAAAGTGCTCCCCGAACCGTGCGAGATGGTTACCCATCACACGGCTCACCA +ACTTGAGATTGTGATTAAGGCTTGGGGTAACTACTGTATGTTTAAGCAGGCTGCAGCAAA +AAATCTATTTTTTTTTTATTCGCTGCATATTTTTTTTTCATTGCCTAGTCTCATTCGAAG +GTACAGTGCCGCTTACCTTTGCCACTCAAGCGTACGGCATCTGCCGACTTAGGCCTCTTC +CCTTTTGCCGATCTCAGTGTTTTCTCAAAAAAACTCGCAGCAAAAAAATTTTTGAATATT +CGAAGCTGCGCCCTTTCGAGTAGGCGGGTACTACGAGCCCTCTGTCCCACACATCTCTAT +CTATAAAAATGTGTGGTTCACCGGTTTCACCGAATACTCTATTGATGTCGAGACATAGGT +GCGCTTGAAGTGGTGTGCTGTCCTTATTGGACTCAGGCCCCCCGTTTGTTCGGGCATATG +CGCCCTCTTGCTGCCCATATGCAGGGGGAAACGCCGTGTTATCTAGCCTCTATTACATAA +GGCCTCGCCTGATGCGCCAAGTTTGGGATACCTCCTCCACCTTACGTTTGTGACCTATGG +CCTCACCTGTGTCTCAAAGACACGGTCGGGGCACAGCCAAGGATATTTTTGGCTACGTCC +AGTATGCCCTTTTCCCGACATGCTATGATGCTCTAAGGGAGTTCACCCCATAGAGTGAGT +CGAGTCCGTCTCACCGCAGAGCAACTGCAGCGTCCGGATAATCTATTTATCCAGCAATTC +ATCCAGTGACTTCACGGTCGCCAAAGAAGCCCCAAGAAGCATCAAACATCTCGGAAAAAA +TCCATGGAGCAATTTTTATATAGCAAGCACTAGCTCCCAGTGCAACTTCATAAAAAGCAA +TCAGAGATAAAATAAAAGATAATGAAACACACGTGGTTGTGACTATAGCAGTTCCTCGTA +AACCAAGAAAACGACCAAAAGCACCTGCTACACAACTACCTAATAAAGGTAAAGTTACAA +TTAATAAATACATACATAAATCATTTTTTTTTTTATTGTGACCAAAATACAATCGATTGG +GTAGATAATTGATTGTATTTTGGGCGACAGCCGCACAAGCCAAGACTTAGATGAAGGCTC +TTTTAATTTTAATAAATTGACGACACAGCCTAACAAAGCGAGTTTTCTTTTAGCGCATCC +AATAGAGTTAGCCGCATGCCATTACTACATAACGACATAATTGAAAGATAGGTAATCTTA +GATCACTCCATTTTATTAGTAATCCACTTATCATCTGCAACAAGTATCAAAATTTTGTTT +TAGTAAGTGCCTTCATTATGCAAGAACTACGGAAAAAAAATCTTTTTTTTTCACACAGCG +GGCGTAACAGGCGTGGCTACGCCGCTGTTATCACTCTATCGGTCCTTGTGGAAGCTGATG +GCTTATGCAATCAATATTTTGCTTGGCAAAAGCTCATAAAGCCGTTTGGATAGTAGAAAA +GAATAAGGCGGACAAAAAAAAAATTTATTCTTCTCTTCCACTTTAGTCGCAAAGCAATTC +AGCAGGGAAGAAGAATAACCCCTGGCTAAACGAAGCCTTTATTTGCTTTACGAGGACGGC +GGAAAATAGGGGGCTGGGGCCCTTAGCTTCAAGCACAATTGCGGAACCATAGAATTAAAA +AAAAATATATATATATTTTTTTTTACTTCTTCGCCAACTTATTATCTCTTACTATATTAT +ATTATATAGATGGCAAAACTACGTAGAACAAAGCTCAAAAATTTTTTATTCCAACCTTTG +CACTTTATTGCTTTTCTCTAGCCTTCAACGAAAAGAAGCATTCTCTTCTTTTAGTTCTAA +ATAAAGTCAATAGAGGCTTACTTTTTTATTGGAGTATTCTGCCTGTCATACAAAAGTCAT +TGCCATTGCCAAGGTTTTTGTGACTATGGTTAGATTAAATTGAGTCATTTTTTCGGCACT +ATCTCAGATTTAAGATAGACTAGTATAAATCAATAAAGAAGGGGTCTCTACACACCTCAA +GGCAGAGGGCGCAGCAAAATATAGATTTTTTCAAATATTTGAAAAAATGCCGCAGGTCCG +GCCGAGCCGGAATAAGCCGGGGGTGTCTATAAAATGAAATGGTGAAAAGAAAACGACACA +GAAAGATTGTGTTTCCACTCTGCGCTTCGCTTCCCTAAGCTCACTTGGTGAAAATGGTAA +ACACGACAGACTTAAAATCTGTTCCTATGGGTTATCGGTTCAAGTCCGATAGTGAGCATA +CTCGCTTGGTGAAAATGGTAAACACGACAGTCTCAAAATCTGTTCCTATGGGTTATCGGT +TCGAATCCGATAGCGAGTATCTTAATGTCTGAATTGAAAAGAAGGGCGAGTATAACTTAA +TAGGTGAAAGTGTCAGATTGTGAATTTGAAAACACGGGTTCGAATCCCGTTATTCGCCAA +AAAAACAGATCATCCATTAGCTTAAATCTTTACAATCTTTGAGGGCGCTGCTTTTCGCAG +CAAAAAATATTTAAAAAAAAAAGTTTCGCTATAAACTACGTGCCCCAGCTCTGTCAATCA +AGCCTGCGGCCTTGATTAGCAAAGTGGGGCGTTACTGGGTAGTTAGCCTCTGGTGGTGAC +GTCACCCCATTGGATCCTTCCCTTTCTTCTCGTATAGTGGATTGAGCATAAAAGGATGGG +GCGTTAGACATTGCGCCCACGCTTGGATATCATCAGCAAAAATGGCTGATGGATCACTCT +GCTCGTATTAGCTGGTCCAAACAAGAACATAGAGAAGTATATGGGTAAAAAATGAGCTCA +AAAAGTTGTTGAAATACTGATGTTGTTTCTAAATTAGCAGCTTTTTTTATATCCATATGA +TTGACCGAAGTAGATTGAAGTTGTCCGTATAATAAAACTAGATTTTGGTTGTAGAAGGCT +GAAGGTAACAATTGTGACCATGTATTATTTGGTGCTTCTTCAATTAAGTACAAGATACAA +GTGGGACCTGCACTAGAAGCGAGCTGCTCAATAAAACCACCTTGCTTGTTTTTATGTGGT +AGTTTTCCTTTGTAATTTGGTTGAAATAAAGTTCTACCTCGAAAGGCACACAATATATTT +TTGAGTTGTCGCCATTGCCGACTTGTCAAGCCACTACAATGGAATAAAAGAATATATGGA +GATTTTTTTTCAATCTCTTGGGCTTTTTTCCGTATAACAATTTGTTTTATTAGCATAGGA +TCATTATTATTATTTTTTTTCTAGTTCCTTTTTTTCTTATTTTTCAACATACCTTAAATT +TGAGTAAGTGATGCCGGGTTTTTTTTTACATCGAACAAATGCTATGTTTGTTAATATGCT +TTATGTTTTCTGAATAATAAACCGCGTAACACAAATAGTGGAGGTGAGGTCAACCTTGCG +TCGTGCTACACGACAATTTTGTTAAGGCTTTATTCCAAGCAGCTGCCTTCCGTACTGGCA +GCTTTGACAAAAGGATCACTTCGGGAATATATTCGTCTTGAATTCATAAGACTAGGAGTT +TGAAAAATTGTATGACAAGACATCGGCTCAAATAGCTACCCAAGAGGAAGCTCAGTGAAT +CCCTAGACCTACAGGTGATGCGTACCGTGTAACAGAGAAATATTTTTTCTTCAAACCTAC +TATCTTCAGCCATACTAATTGCCCCTTCTTGCTTTTCTGAACCTTTGGCTAAAAGACACA +AGGCTCGGCCCCGGCCCCATATTCCAGCTTATTTCCGCGATAGGAGCATTTATGCGTTTC +CTGATGATGAAATTGAAGCCTGCGTTAAAGCCCTGCTGCGCCCTTCAGCCTCAACAAGTA +TAAGAAGTAAGTTGAATCTTTTAATTATCCGCCAGGGTTGAAACTACTTGCGCGGCCTGA +GCTATTTAACCTTCACCTCGCTTTCAGTTACTTTGTATCCTTATATGGAAGCCCTTTATT +ACCAAATACTATGGATATATGTCCCTGCTATTTTCTTCTATTCCTCAAAAGCTTCTTATA +GAATTAAAACTTACTTAGTGGGTGTTTTTCTTCTGTCCATGGCTAATAAGGTGTCACGTC +CAGAATTGTTATGACATTTTATAACTTTCTTTAGGGCTTTAAAAGAAGCCGAAACAGCGC +CTTCGGCCCTGCCCTTCTCATAACTACTTGCCCTTCTCATAACAAACAAAAAAGGAGGCC +CGACAAAGGGCCGAAGGCGCCGGCCACCATCCAGGCAAGCTTTGAACTTTCGCCAATAAA +TAAAGGGCTTTAACTCTGGCGCATATTTCACAATACCGCACAGAAAGGCGGCGCTGCGCA +CCCTCCCTTAGAACTACTAAATCTCCCTACTCAACCTAATAATGTTGCTTGTTTTAATGG +CTTTTTCTTTATTGCTCGAATAGCGGATATGGCAAGAGCGCGCTATATTGCTATATATGA +TGATTCATAGGATTTATTCCTTAGATATTTGACTAAGCTCCGAGCCTAACCAATAATAGA +TTCTGAACGGGTGTCCTTCGTCCGGCCTGCTTTGTTCCATTTCATAGCGGGGCTATTTCT +GTTTTATATTACTTTATGTGACGGTTATATGTCATAGAGTGGACAATCCCATCATCACCG +CTATGAAATGTTTAACCTGCCCAATTTTTTGGCAGCGGCAAGAGCAAAGACAGGAGCTGG +CCTTTTACCAGGCCAGCTTATCTTTATAATCAAGTATCTGGCTCTTTTTATAACTTTTAC +AAGGTTATTTTTTAAATGCATTTCCCTATTCATTCAATAAGGTGTAGCTGTCAACAACCT +ACACTACAGTTAGGGTCATTAAGGCTGAATGACATCTGGTTCTTTTTTCTTTATTTAGGG +CTTCGCTCTCTAAGGCGATAACAGAAGCCATATAAGCTTCTACTATAACATCGCCATAGG +CATACGCTACAGAATGCTTATAGGCTCTCCTAACCTCGAGAACCACGTACAAAAAGGATT +TGAGAAAAATAACGATATATCCGTACACAAGCTTTTCAACCGTACTTCCTTCGCGTAAAA +TAATTGCCAATCAAATATAGAGGCAAGATTGAACCAGGGAAGGGCGCCTCCTTTCACATC +TTTCTGCAGCATCCTTCGTTTTTCGACCGAAAGACTTACTTAGTACTCGGCTAAGTTTTG +TTTGGTTAGCTGTAAGGAAGCCTCTTTTAATGTCCCGTCATACTGCACCCGAGAAAAGCA +AAGAGAATACTACTTAGGTATAAGCACGCGCTTGCAAGCTTCGCCCTTTATTTTATAGTT +AGTTTCCTGTTGGGCTTTATGTACGCTTTAGATAGAATTCTATATCTGCTTTTGCGTCAT +AAAATGGGATCTTACATCTTCGTTGTTGCGATATCGCGATGCCCAGCACGAACCTGTTGT +TTTTTGAACCAGGTTAGTAATAAAAGCAATTCAAAAACTGTGAGTGCGAATAGTCTTTCC +TACTATACTAAAAAGCTTGATAAAATAAATAGGTTTCCGTTTTGGAGACGAGAATAATAA +TAATTAGCCCAGAAGGAATAATATCACAGAACAAGTGTTTTGTGTACCCAAATTACGCAA +ATGCATAATAAAGCGGTATATTATAAATGGCGATATATTTGCAGCAGTCTTCGATTAGTA +GGCCGTAGGAAGTTCATTTTGGTTGAGTTAGCACTTCTTGCTGGCCCTCTCACTCTGGAG +AGAGAGCTGGGCCCTGCTGCTGGCGTCATCGATGTACATTATTACATTTCGAATCAAATC +TAATTTTTCTGACAGCGTACGATTACTATAAGCTAAGGTTCCTGCTACTTTCATTAAATT +AAAGTTCGGTTCGTTTTCCAAAACCTCATAACAATCCAGCAGGCTCTTGAATAGACACGA +TAATAACCCTAAAAACCATGATACTTAAATGAGAACACATCTTAGAAATGCTATAATTTC +ATTATGGACTGTCCAATTTTTTATGTCAGAATAATCATTTTGGAACACCCTCTAAAATCC +AATGATGAATAAAAGATGGTCCAGAAAGAAATACCACAGTTGTAACTATTCTTACAGTAA +TTTTAAAGATAACAAAGGGCAGCCAAAAGATCCAGTAGCTTACCATATTTTTAAGTACTC +TACCAGAGAATCCACAGAAATGTAAAGAACTTTTTTTATAACAATATTCTATGATAAATT +TTTTTTTACTTTCCGTAATTAAAGCAGATCGCAACAATCTCTGGTTAAGAGGTTCTCTCC +GGAGAGCCCTGGGCGGGCTCCCTTGCTTGTCCCCCACCCGTGTGGGTGTGGGTGTGGGGC +AGGTCTCCACTGACGAAAACAAGTATTAATCTTACATTTTTGCGCTTCGCACATACATAA +AAAACAAGAAGTAAATAATTAGTACGAGTAAGCTCAACAAGAGCCGTAAGCCTTGTTTCA +AACCTCACGGTGTTTACACCTCTCGCCTATCAAAGTGATGGTCTTTCACCTTTTATGAGA +ACCTGTATAAAGAAGGATTTCCCGCTTAGATGCTTTCAGCAGTTCTTCTATACCAACTTA +GCTACCCGGCGCTGCTATTGGCATAACAACCGGTACACCATAGGTTGACCCAACCCAGTC +CTCTCGTACTAGGGTTGGCCTCTCGCAGTTCTCTTTTAAACACCAACGGTAGATAGGAAC +CGAACTGTCTCACGACGTTCTAAACCCAACTCACGTACCACTTTCATCGGCGAACAACCG +AACCCTTGGGACCTTCTTCAACCCCAGGATGTGATGAGTCGACATCGAGGTGCCAAACGA +CTCCGTCGATAAGAGCTCTTGGGAGTCATCAGCCTGTTATCCCCGGCGTACCTTTGATCC +GTTGAGCGAGAGCCCTTCCACGCGGGACTCCCGGATCACTATGGCCGACTTTCGTCTCTG +TTCGACTGGTAAGTCTCACAGTCAGGCAGGCTTGTACCATTACGCTCTAAAGCTGTCAGA +ATATTAGCTTGAGCCTACCTTCGCACACCTCCGTTACTCTTTGGGCAATTATGTTCATTT +ACTCTGCAACATACTTGTCGTTGTATTTGATTCAATTGAAGTGCTTGCTAATATCTTTAT +TCCTTTTTCTTTAGTCATTTTGTTCATTTTTCGTATGGTTATTAGCAGTTTAAATGTCCT +ATTACTGTTTCGTAGTGAACAGTGCTTTAGGCTTCCAGTTTACTGGAAGTTCAGACTGTA +TCATTATCCAATCATTATTGCATTGGATACCTGGCATGCAGTCGTTGAGGGAGCATAAAT +AAATTTTTTTTATTTTCGCTACAATGTAGTGGCTTTTTTCAGCCTTCCCTGCTGATTGTC +CAAGGAATGGAGATCTCAGCATATAGCCAGATTGGCCTGGGATGTTACCATCCAAGCGCC +CCGATTTAAAGGCATCCGCCCCAGATAAACTAACCACCATGCAATGTCCCGCCTCTTTTT +CGGCAGTTAGACATCCTTAGACGAAAGAGTGGTATTTCAAGATTGGTGACGACGTTGCAC +GTCACCACCTCCCACCTATCCTACACATTCAATCAAGGTTGTCACTGCAAAGCTATAGTA +AAGGTGCACGGGGTCTTACCGTCTAGCCGTTGGTACTCCGCATCTTCACGGAGAGTTCAA +TTTCACTGAGTCCATGTTGGAGACAGCAGGGCAGTCGTTACACCATTCGTGCAGGTCGCT +ACTTATGCGACAAGGAATTTCGCTACCTTAGGACAGTTAGAGTTACTGCCGCCGTTTACT +GGGGCTTCCATTCGAAGCCTATAACACTTCTCCTTTTCACCTTCCAGCACCGGGCAGGTG +TCAGACTCTATACATCGTGTTACCACTTAGCAGAGTCCTGTGTTTTTAATAAACAGTCGC +TACCCCCTGGTATGTGCCGCTTTCCTAATCTAAGGATAGGAAAGCACCCCTTCTCCCGAG +GTTACGGGGTCATTTTGCCGAGTTCCTTCAACATGGTTCTCTCAAGCGCCTTAGTATACT +CTACTTGTTCACCTGTGTCGGTTTGGGGTACGGTTCGTTTTGCACTGGAAGAATCAAGAT +TCTCCCAATTCCACCAAGTTTTTTCCTGGAAGTCCATTGGCCTGCTTGATCATCCAATTC +ACGAGTTAAAGAAAAAACCCGTGGTTTAGTAGCCAAAGCAACTTCGTCACTTTTGTGTAC +CCATCGGATTAAGCCCTTTCGGGGGTTCCTTAGGGACCGATTCACTCTGCGTAGATTTAC +TGAACGCAGAAACCCTTGAACTTTTGGCGATCATGTTTTTCACATGATTTATCGTTACTC +ATGTCAGCATTCTCACTTCTGATATCTCCAGGTGTTGTCACCAACAACCTTCTTCGATTT +ACAGAACGTTCCGCTACTGACACTTGAAAAAAAAATATTTTTTTGCACTTTATTATGGGT +GCGCTTTGCTTACCCGAATAAGCCCCACTGCCGATACGCATAAGGGTACTGTAAGCGCGG +AGCATGCAATTACGCAGTAATTGCATATGGTGGGTGCTTTCAAGGTCTCGTCGCTTCGGT +GAATCACTTCAGCCCCGATACATTTTCGGTGCTATGAAGCTAGACCAGTGAGCTATTACG +CTTTCTTCAAAGGATGGCTGCTTCCAAGCCCACCTCCTGGTTGTCATCGCTCGATTACTT +CCTTTTCCACTAAGTGATTGCTTAAGGACCTTAGCGTACGATCTGGGCTGTTTCCCTCTT +GACTTCGGATCTTAGCACCCAAAGTCTGTCTGTACAAAATCATGGCCAGTATTCGGAGTT +TCCTTGGGGTTGGTCAAGCTTTGGGCCACCCTAACCCATTGAGTGCTCTACCTCAGGCCA +TAAACATTATACGCTCTACCTCAATAGATTTCGCGGAAAACCAGCTATCTCCGAGTTTGG +TAGGCCTTTCACCCCTAGCCACAAGTCATCCCCGTATTTTGCCACATACGTGGGTTCGGT +CCTCCAAGGCCCGTTAGAGCTCTCTTCAACCTGCTCATGGCTAGATCACTCAGTTTCGGG +TCAAATAGAAACAACTATATTCTTTATTTACTTTCAACTTCATTGCGCCTACACCTAATG +GCTTAAGCTTGCTGTGTCCATTTACTCGCTGACCCATTATGCAAAAGGTACGCCGTTAGA +GTGGAAAGTTCTGAATAACGAGGCGCAGAAGAGAACGCAATCTCTTCTTTCCCGCTGTTC +AGACTTAGCCTGCTTTGTCCTTCGACTGATTGTTTGCATCGGATTTCAGGTTTTCTATTG +CACTCCCTTTCTTAGGGTTCTTTTCACCTTTCCCTCACGGTACTTGTACGCTATTGGTCA +TTGAGGAATACTTAGGCTTAGAGGGTGGTCCCCCTTGGTTACATAAGAGCAATCATAATT +CAAACACAGTATCCGCGTTTTACTTATCGAATTGAACCATAGGAAAAAATCTACAGGGCT +ATCACCTTCTTTGGCAAGATTTTCCAACCTTTTCACAATTCCTTGAATGGTCTTTCCATC +ATTCAATTTCAAACAAATTGAATGAAGAGAGAAGGCCGCAGGAGAGCGCGCAAAGCGCTT +TCTCCTGCGGCCTTCTCACAAAGCCTAATCCGCTTTCGCTCGCCGCTACTAACGGAGTCT +CGGTTGATTTCCTTTCCTTTAGCTACTTAGATGTTTCAGTTCGCTAAGTTTTGAAAGTCC +AAGGCGGAACGCAGCACACTAATGCGCCGCTCCGCTTGGATACGGTTTCCCGATTGGAGA +TCCATGGATCACAGACGGTATCTCCCCATGGCGTTTCGCCCTTGAAAGCGTCCTTCCTTC +TCAATGCCTAGGCATCCATCCGATGCATTATTTTGAATACGGTAGGAATTGCACCTACTC +CACTAGTCACTACCAAAATATTCGCCAATGAGGCTTCTATTCATACTTCAAAATGGTGGG +CATGCCTTCTGATCAAGTCATTTACTGGAGCGCCTTCGAAGGTGCACTCGGGCTTGAAGA +CAAAGGTTTATGCACTTTGTGCATATTTTTACAAAAAAAAGAAAGATAAAGAACCTGATA +GAAAAGCATACGAAAATCCAGGCCACTAAAATAACTTAACCAATCCACCCTTTAATTATC +TTGTAAATTACTATTCCAAATCATCAATATAATACATATCTTATATTTACATATCGCATT +GTTCGCACTACAAGTAGAATCCGGGTTTCCTTTCACCTAAATCCAATAAATGAGCATTCA +CCTTATCATTTAATTCACACCATTTATATAAGTGAACATTTTAATATTTTCATACCTGAA +AGATAAATCAGGAGCGCGCAGAAACAACCAAATGATGGATGCACTGCTTTCTAAATTCCA +CTGTTCAACAAAAACAATTTATAACATTTAGCGGGAGTAAGATTCGAACCTACAACATTC +AGATTATGAGCCTGACGAGTTACCAATTACTCTATCCCGCGCACATAATAAAGGCGGCGC +TGCAAAATATTTTTTTTGCGCAGCAGTTATCCGCGCTGCCCCAAGTATAGCGGTTTATTA +TAAATATGTAATTTTTTTATTGCGAGATTTCTCTATGATGATTCATCCATCGGCGTCCAG +TGTTGCGTAGCAATACTTGGCGGGCTTAGTCCTCAAGGCTTCGCCCCCTTTTAGACGATC +TAGTTCATGACTGCGTGCTTGGCGGAACTTGGTCGCGTGTGCTCCTCATTCGTAAAGCTA +ACTAAGCGGGTCCTTCGGCCCATAAGCTGCGAAGCTGCTTTGTTGGGGGATATTTGTAGA +TGCTTTATGGCTCAGTTTATTACGCATGGTCAATGAAATATGTTTCAGCATTATCAGTAT +ATGATGACGGCGCACGCCCGCTTGGAGCCACACCCCGTATTATTATGTATGACGATCATA +TACGATATTGTATTATTATGCATTATTATTATGTACAATATTGTTAGTTTTGTAAATAGT +ATTTATGCTATCGCTAGTACAGAATATTCATTTCCTTTTTTACTCTTACTTTTCCGACGA +TCAAAAAACGAAATCTGCGCCTTCGGCGCTGCTTTTCTTCTTCGGCGCAGCCTTCTTTCG +CAGGCCAGAAAAAGGCACTACCCCCACAAGATAACCTACTACACCTAAATCTAGAGAGTA +TACGGTTCGTTTTGCACCGGATTCTTCCAATTCCACCAAGTTTTTTCTTCGTGCCCTGCC +TGGTCGAATTCGCTTCATGGATCGTATTTATTGAATTCTGTATTTGCTCTAGCGCCCTGG +CAAAAGGAGGTCAAGAAACTATATACGATGAAAAAGCATGGGTTGTTTTTTATTTTTAAT +ATAGTCGTTTGGCAGGCCTATGCTACTATTTTAATAGCGTTTCAAAATAATTTAAATTAG +ACGTTGTCCGGGCTTGGACCATGTCTCCCGAAATTTATAAATCAGTACATATAGCGTAAG +ACGATTTCACATTTCGAGGTCGGAATGGGATCGGGTGTTTTCACGTCTTACCATAGTGCC +CGGAAGCGCGTATCGATTAATGAATAGAGTAAAGAGTGTACTTGCCCAGTCGTAGGTTTC +CTGCATTCAATGGGGCAGACTACACAAGTGCTCTCCGGCGATAGTCGCCCATCACATGGC +TCTCTAACTTGACTTTATTTTGATTTCTTGTAAACCCTTCCGGCCACTCTTTCGAGTCAG +TTGCTTGTCGAAGCGCCCGGCCCCACAAGGCCGCGAGCCACGCGGCGGCTGCACCAAATT +GTTGCCGGTGATGCCGCCGCTGTATGGAATTTTACGTCTTTATCTGATTAGGGGATTTAC +AAGTTACTGCATATATATAACAAGCAGATTTACTTCAGATTCCTAAATAAATCTAATTAT +TGATTATAACTTACATACATAAGGTACGGCCGCTTCTTGGGATGCTTTCAGCGTTGTTTG +TTTTTTACATGGCTTGTCCAATCGCAGGATTGGGCGCCCTGCCCTATGTTATAATGTCGT +TTAGTGCCCCGTCCTCAGTAATATGGGTGCTGCGCTCCCTATCCTGACTTATCCTTTACG +TATGAATAAAGTAATTAAAAGCAAGCCATTTAGACTTAGTTAAATCTATCCTGCGACGAG +CTGGCGAGCAAGCAACGTACATTTGCATGCGCTCCCATTCGCGGAGCTGCAACAATGAAT +CATCTATGATGATTCATAAGCTACTGGCATCGCGGCTAATTCTATTTTGTTGACATAAAA +TGAGAGTATAGCAACTCTTGTTTATGTAGTTTGTGAACAAGATCTATAATTAAACCTAAA +GGTGTACTGTATATGATTGATCGCTTTGTCAGAACAACATTCATTATGTTGGATTTATAG +TTATGCTAAGAAACATCGTAAATCTTTCTTTACCATAGCTTATCATAGAGGGAGATTTAG +GGTAAGGTTAGTGACCAGCAAAAGAACATATATATATCTTTTCTTTGCCTCCCACTATGC +CGTAGGTTAAATTCCTATCGGAAGAGGGATTTGAACCCCCGTGTGCGAATTATGATCCCG +CTGTTCTAACCAACTAAACTATTCCGACATGCGAATTATTATTCCGCTGTTCTACTAATC +TGCCGCTTCCTGGCTGTTGGATGATAATTCGCTTCATGCCCTGTGGCCTGGTCCCGTAAA +AAAGATATGGCTTCGTCATTCTGAACGATAGGAGTAACTGTGTATGTATAGAGTTCAAAC +GGAATGGATGATTAAAGCGCCTTATGCCTTGGTCATATTTATAATTGCGACTGAACTTGT +GACTGCCGTTATTATGTAGTAGTAAAGGCATTCACGAGCGTAATTAGAAAGGCATGCAGT +ACTGCGGGGCACAACGGCTCTGACTGACATAGGCCAAAACTTCAATCAAGCCTTTAAAGG +CTAATATTGCAGCCTTTATGCCAGCCGCTTTACCTCCGGGCTATGTAACTATAGTCCAAA +GAATGAAGAGCACAGGGCGTATCTACAATTTTCAGAAAAAGTAAATATTTTTTTTGTTCG +AGAAAGGAGTCAATCCAGCCACAGGTTCCCCTACGGCTACCTTGTTACGACTTCACCTCA +GTCAAAGGCCCCACCTTGGTATCCCACATTAAACCACCAATTACTCTGGTAGACCACACT +CAACAACGGCATGGAACACTGGAATAATGGGTGATCCTTGGTCTGATGCTTCGGGCGAAG +CCAATTCCCATGGTGTGACGGGCGGTGTGTACAGGGCCTGAGTACATATTCACCGCGGCA +TGCTGATCCGCGATTACTAGCGATTCCAACTTCATGTTCTCGAGTTGCAGAGAACAATCC +GAACTGAGGCCATCTTTCTGGATTCGCTCCGCCTTAAAGCCTTGCTTCCTATTGTAATTG +CCATTGTAGCACGTGTGTAGCCCAGCCCATAAGGGCCATGCGGACTTGACGTCATCCCCA +CCTTCCTCCAGTATATCACTGGCAGTTTCTTGTGAGTGCAGTACATGGCATGGATTGTCA +ATTATTTTTACAAAGACTGGGTGCCACATTGTTATGTGTGCCACGCTATGAGAGCTGTGC +TCGTCGAAGTACCCTTCGACAACTGTGTAGTCGTAGCCACACGGTGTTGTGATATCCTTT +TTTAATGTTGTGGTCTTCGTTACTTTTAAGTGCTCAATAATCGATCCTTTAATCCAAAAC +GCATGTCTTAGCAACACAAAACGAGGGTTGCGCTCGTTATAGGACTTAACCCAACATCTC +ACGACACGAGCTGACGACAGCCATGCAGCACCTGTATAAATTTTCGTACCATCCCATTAA +GGACAAGCAAACTTATTCATATGTCAAGGGCTGGTAAGGTTTTGCGCGTTGTATCGAATT +AAACCACATGCTCCACCGCTTGTGCAGGCCCCCGTCAATTCCTTTGAGTTTCAGCCTTGC +GGCCGTACTCCCCAGGCGGAGTGTTTAACGCGTTAGCTGGGCCCCTGATCAGCCATTTTG +CATACAGCGCAGACCAAGGACGAACACTCATCGTTTACGGCATAGACTACCAGGGTCATA +TTGAAGATAGTTTTTCGCCAGCATTGAGCCCTTGTTCAGTGGCTTAGCTGCTTCTCGAGG +TCATGATGGAAAAGAAATAATCCCATCACGATCACGTAGTGATGATTATCTTCAAAAATG +GACTATATCATTCTCTGAACTCCAATCTTTGCAGTTGCTTTCTTTTTTATTGGCTAACTA +TTGGTTAATGCACTTCAAAAAAAGAGGCAGAGCTTTCTTCATTTCAGAGATCCAGCGTTG +TTGAATTTCAGGATCTTTGTAAACCAATAATATGGCTTTCCTGGGCATGCTCGCCACGGG +AACATGTGGTAAGATCATTCAAAACAGTCTCTTAAGTTCCTCAATACCAGACCAAAGGCG +GCAGTAAGACTCTTGGCTTGAATTACCGTACTTTCGATTTTTTCGGATAGGCCCATAAAC +CTTCCTTTTGAAAAGTGTCTAAGTCTCGGGCTAAAAGCTTAACATATTCTTTGTCGAAGT +CATCAGTAGCCACTTGTTCTGGCCAAAGCCATCGACGGCTTGCCCCTGCACGGAACTTGG +TACCAAGATTTCATGCAGTGAGCCTACTTTCCATCCACCGAAAGTAATTTTCCTTCATTT +GGCTATAACGAACGGAGAACCTTGCCATCTCTTAGGAGTTATCCTAAGATAATTGATTTC +ATTAAATCCGACAAAGATATTTGCTTGAGTCTCTTCAAGTGCAGGTTAGGAGCTTGAGCT +TTGCCTATTTTTCTTTATATCTTCTATTTCCCTATTTCCATAGTCTCTACGGGGTGCTCT +TGATCCGAGATTAAAGCACTTCCCTCGGGATTGTCTGATGTTAAATGGTACATTCACATC +AAAGTTTCCCCGAAATAGCTGGATGCACACTCTGACCTCGCGATCAGAGGGGACACCTTT +GACCGTGTTTCATTTAGAGCCCGGCCGGATCTATCTAATCCTGTTTGCTCCCTATGCTTT +CGCACCTCAGCGTCAGTAGAGACCCAGAAAGCTGCCTTCGCTTTTGGTGTTCCTTCGTAT +ATCTGTGAATTTTATCTCTACACACGAAATTCCACTATCCTCTATCTTACTCAAGTGAAT +TGGTTTTGAAAGCATTCCGCCAGTTGAGCTGGCGACTTTCACTTTCAACCGGATTCACCG +CCTACGTGCCCTTTACGCCTAGTCATTCCGAATAACACTAGCCCCCCCCGTCTTACCGCG +GCTGCTGGCACGGAGTTAGCCGGGGCTTCTTATTCAAGTCTTGTCACAATCGCACACTCG +ATGAAAGAGCTTTACAAGCTGCGTTGCCCTTCTTCACTCACGCCATATTGCCGGATCAGG +CTTTCGCCCATTGTCCAAGATTCCCCACTGCTGCCTCCCGTAGGAGTCTGGGCCGTGTCT +CAGTCCCAGTGTGGCTGATCATCCGAAAAGACCAGCTAAGCATCGTAGGCTTGGTCAGCT +TTTACCTAACCAACTACCTAATACTACGTGGGCTCATCAAACAGCGCTTTTTAGCTTTCA +TTCATTCAGGATTTGGCCCAAACTGTTTGGCAGATTCCCACGTGTTACGCACCCGTTCGC +CACTTTGTTTTCATTTTGACTCGAAAACAACGTTCAACTTGCATGTGTTAAGCATATCGC +TAGCGTTCATTCTGAGCCAGGATCAAACTCTTTTTTTTAAGTATGATTTTTTACACAGAA +GTAGGTTTTGAACCTACCAAACTTCCAATATAAAACCTCTGCGTATCACTAATTAAATCA +TTATCATACTAAAGTTTACTACCCATAAACCAAAAAGGATTCACTATGTAGAACCTTTGG +TCCAATCAACTTGTTATGCTTTCGCATTACGTAATTACGTAGTGATTGCAAATTGATTGC +AGATTGCCAGTATGCTATGCTGTATGGAGTACCACGGTCTTCACTGTAGGTGGCCTTGTC +TAGCTTTCATCTTTCAATATAAAGGTGAATAATTTTAATAAAGAACAGGGCGCGAAGCGC +ATCTTTGTTGCTCTTGATCCTAATTCCCAAATTCCCTTTTTTGCTGTATCCTGCTTCGAG +GCTCTGCCAACATTGCAATAAACCTAGTTGTTCACAGTGAAGCGAAGCAATGACGTAGAT +TTCGACGAAGGCTTCGTTTCGGTTTTGATAAGAATTATATTTTTGGGCTGTAGTTTTTTG +AGGCTGCAAGATTAATCGCAATTTTGAATCCAAGCCGTCGTTATTTATCTATGATTATTC +ATGGCTATTCCTCATGGAATTACTTAGTTAATAACATAATTAAAAAATTAATAATCTATT diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_checkm_qa.out b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_checkm_qa.out new file mode 100644 index 00000000..bbd100a7 --- /dev/null +++ b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_checkm_qa.out @@ -0,0 +1,51 @@ +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Bin Id Marker lineage # genomes # markers # marker sets 0 1 2 3 4 5+ Completeness Contamination Strain heterogeneity +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + bins.24 k__Bacteria (UID203) 5449 104 58 0 16 12 9 2 65 100.00 752.93 5.58 + bins.2 root (UID1) 5656 56 24 0 0 0 5 5 46 100.00 654.08 15.82 + bins.32 k__Bacteria (UID203) 5449 104 58 8 6 33 55 2 0 98.75 159.64 34.29 + bins.14 k__Bacteria (UID203) 5449 104 58 1 49 48 6 0 0 98.28 51.83 37.88 + bins.40 k__Bacteria (UID2982) 88 230 148 4 220 6 0 0 0 97.30 3.38 33.33 + bins.20 p__Proteobacteria (UID3887) 1487 259 162 10 249 0 0 0 0 95.68 0.00 0.00 + bins.1 o__Rhizobiales (UID3642) 107 485 316 17 461 7 0 0 0 95.46 1.74 42.86 + bins.23 k__Bacteria (UID2982) 88 227 146 37 43 57 65 20 5 94.27 138.49 24.88 + bins.11 k__Bacteria (UID3187) 2258 188 117 13 173 2 0 0 0 92.90 0.93 50.00 + bins.12 k__Bacteria (UID3187) 2258 188 117 11 175 2 0 0 0 91.45 1.71 0.00 + bins.21 k__Bacteria (UID3187) 2258 188 117 54 132 2 0 0 0 90.86 1.71 0.00 + bins.28 k__Bacteria (UID203) 5449 104 58 28 16 46 13 1 0 90.50 91.23 60.44 + bins.17 p__Actinobacteria (UID1454) 732 200 117 50 148 2 0 0 0 80.85 0.71 50.00 + bins.19 c__Gammaproteobacteria (UID4202) 67 481 276 117 304 54 6 0 0 79.71 16.20 23.61 + bins.37 k__Bacteria (UID1452) 924 161 108 31 128 2 0 0 0 77.31 1.01 0.00 + bins.22 k__Bacteria (UID2982) 88 230 148 52 157 21 0 0 0 76.58 7.22 19.05 + bins.31 k__Bacteria (UID203) 5449 104 58 47 35 19 3 0 0 74.61 37.38 50.00 + bins.29 o__Actinomycetales (UID1696) 455 311 187 88 135 63 18 7 0 72.12 41.74 12.58 + bins.43 c__Gammaproteobacteria (UID4202) 67 481 276 170 301 10 0 0 0 67.30 2.81 20.00 + bins.13 k__Bacteria (UID203) 5449 104 58 52 41 11 0 0 0 66.07 17.24 36.36 + bins.33 k__Bacteria (UID203) 5449 104 58 26 52 25 1 0 0 63.79 8.12 10.71 + bins.38 k__Bacteria (UID3187) 2258 188 117 92 95 1 0 0 0 58.97 0.85 0.00 + bins.42 k__Bacteria (UID2982) 88 230 148 122 100 8 0 0 0 55.61 4.73 0.00 + bins.15 k__Bacteria (UID203) 5449 104 58 43 33 20 7 1 0 55.02 39.50 38.30 + bins.7 k__Bacteria (UID3187) 2258 187 116 97 86 4 0 0 0 54.13 3.45 0.00 + bins.16 k__Bacteria (UID203) 5449 104 58 70 24 10 0 0 0 50.00 13.79 0.00 + bins.30 k__Bacteria (UID203) 5449 104 58 65 34 5 0 0 0 48.90 5.17 40.00 + bins.6 c__Alphaproteobacteria (UID3305) 564 348 229 198 148 2 0 0 0 45.45 0.66 50.00 + bins.41 c__Gammaproteobacteria (UID4202) 67 481 276 255 205 16 5 0 0 38.95 2.73 35.48 + bins.8 k__Bacteria (UID203) 5449 104 58 71 33 0 0 0 0 38.40 0.00 0.00 + bins.45 c__Alphaproteobacteria (UID3305) 564 349 230 223 106 20 0 0 0 38.17 6.75 40.00 + bins.46 c__Gammaproteobacteria (UID4202) 67 481 276 306 174 1 0 0 0 37.54 0.36 0.00 + bins.39 o__Rhizobiales (UID3654) 92 481 319 320 157 3 1 0 0 35.08 1.41 16.67 + bins.34 k__Bacteria (UID203) 5449 104 58 84 20 0 0 0 0 24.19 0.00 0.00 + bins.18 k__Bacteria (UID203) 5449 104 58 82 22 0 0 0 0 23.67 0.00 0.00 + bins.10 k__Bacteria (UID203) 5449 103 57 94 9 0 0 0 0 14.91 0.00 0.00 + bins.27 root (UID1) 5656 56 24 54 2 0 0 0 0 8.33 0.00 0.00 + bins.5 k__Bacteria (UID203) 5449 104 58 99 5 0 0 0 0 5.17 0.00 0.00 + bins.47 root (UID1) 5656 56 24 55 1 0 0 0 0 4.17 0.00 0.00 + bins.36 root (UID1) 5656 56 24 55 1 0 0 0 0 4.17 0.00 0.00 + bins.35 root (UID1) 5656 56 24 55 1 0 0 0 0 4.17 0.00 0.00 + bins.3 root (UID1) 5656 56 24 55 1 0 0 0 0 4.17 0.00 0.00 + bins.26 root (UID1) 5656 56 24 55 1 0 0 0 0 4.17 0.00 0.00 + bins.9 root (UID1) 5656 56 24 56 0 0 0 0 0 0.00 0.00 0.00 + bins.44 root (UID1) 5656 56 24 56 0 0 0 0 0 0.00 0.00 0.00 + bins.4 root (UID1) 5656 56 24 56 0 0 0 0 0 0.00 0.00 0.00 + bins.25 root (UID1) 5656 56 24 56 0 0 0 0 0 0.00 0.00 0.00 +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_gtdbtk.ar122.summary.tsv b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_gtdbtk.ar122.summary.tsv new file mode 100644 index 00000000..9f4d96cb --- /dev/null +++ b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_gtdbtk.ar122.summary.tsv @@ -0,0 +1 @@ +No Archaeal Results for nmdc:wfmag-12-fxwdrv82.1 diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_gtdbtk.bac122.summary.tsv b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_gtdbtk.bac122.summary.tsv new file mode 100644 index 00000000..f18be971 --- /dev/null +++ b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_gtdbtk.bac122.summary.tsv @@ -0,0 +1,15 @@ +user_genome classification fastani_reference fastani_reference_radius fastani_taxonomy fastani_ani fastani_af closest_placement_reference closest_placement_radius closest_placement_taxonomy closest_placement_ani closest_placement_af pplacer_taxonomy classification_method note other_related_references(genome_id,species_name,radius,ANI,AF) msa_percent translation_table red_value warnings +bins.1 d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__Rhizobiales;f__Xanthobacteraceae;g__BOG-931;s__ N/A N/A N/A N/A N/A GCA_019075925.1 95.0 d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__Rhizobiales;f__Xanthobacteraceae;g__BOG-931;s__BOG-931 sp019075925 79.24 0.52 d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__Rhizobiales;f__Xanthobacteraceae;g__BOG-931;s__ taxonomic classification defined by topology and ANI classification based on placement in class-level tree GCA_003164375.1, s__BOG-931 sp003164375, 95.0, 78.29, 0.34 92.75 11 0.9596061802260266 Genome not assigned to closest species as it falls outside its pre-defined ANI radius +bins.11 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidobacteriales;f__SbA1;g__PALSA-188;s__ N/A N/A N/A N/A N/A GCA_003169715.1 95.0 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidobacteriales;f__SbA1;g__PALSA-188;s__PALSA-188 sp003169715 75.76 0.05 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidobacteriales;f__SbA1;g__PALSA-188;s__ taxonomic classification defined by topology and ANI classification based on placement in class-level tree GCA_019241735.1, s__PALSA-188 sp019241735, 95.0, 76.42, 0.06 86.89 11 0.9158913290576843 N/A +bins.12 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidoferrales;f__UBA7541;g__Palsa-295;s__ N/A N/A N/A N/A N/A GCA_003131985.1 95.0 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidoferrales;f__UBA7541;g__Palsa-295;s__Palsa-295 sp003131985 88.57 0.77 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidoferrales;f__UBA7541;g__Palsa-295;s__ taxonomic classification defined by topology and ANI classification based on placement in class-level tree GCA_003153585.1, s__Palsa-295 sp003153585, 95.0, 84.66, 0.82; GCA_003167215.1, s__Palsa-295 sp003167215, 95.0, 84.57, 0.76; GCA_013287135.1, s__Palsa-295 sp013287135, 95.0, 79.11, 0.33; GCA_013289715.1, s__Palsa-295 sp013289715, 95.0, 77.79, 0.22; GCA_013289695.1, s__Palsa-295 sp013289695, 95.0, 77.6, 0.24; GCA_003166795.1, s__Palsa-295 sp003166795, 95.0, 77.44, 0.16; GCA_013286165.1, s__Palsa-295 sp013286165, 95.0, 77.34, 0.15; GCA_003156635.1, s__Palsa-295 sp003156635, 95.0, 76.73, 0.16; GCA_003131705.1, s__Palsa-295 sp003131705, 95.0, 76.38, 0.11 92.51 11 0.9840641626029654 Genome not assigned to closest species as it falls outside its pre-defined ANI radius +bins.17 d__Bacteria;p__Actinobacteriota;c__Thermoleophilia;o__Solirubrobacterales;f__Solirubrobacteraceae;g__Palsa-465;s__ N/A N/A N/A N/A N/A GCA_003137075.1 95.0 d__Bacteria;p__Actinobacteriota;c__Thermoleophilia;o__Solirubrobacterales;f__Solirubrobacteraceae;g__Palsa-465;s__Palsa-465 sp003137075 81.98 0.63 d__Bacteria;p__Actinobacteriota;c__Thermoleophilia;o__Solirubrobacterales;f__Solirubrobacteraceae;g__Palsa-465;s__ taxonomic classification defined by topology and ANI classification based on placement in class-level tree GCA_009785395.1, s__Palsa-465 sp009785395, 95.0, 78.29, 0.28; GCA_017354065.1, s__Palsa-465 sp017354065, 95.0, 77.83, 0.19; GCA_017882465.1, s__Palsa-465 sp017882465, 95.0, 77.42, 0.25; GCA_019240015.1, s__Palsa-465 sp019240015, 95.0, 77.29, 0.16; GCA_019240115.1, s__Palsa-465 sp019240115, 95.0, 77.26, 0.27; GCA_017882405.1, s__Palsa-465 sp017882405, 95.0, 77.19, 0.25; GCA_019239375.1, s__Palsa-465 sp019239375, 95.0, 77.14, 0.18; GCA_019240305.1, s__Palsa-465 sp019240305, 95.0, 77.08, 0.15; GCA_019241635.1, s__Palsa-465 sp019241635, 95.0, 77.06, 0.2; GCA_003244035.1, s__Palsa-465 sp003244035, 95.0, 77.02, 0.19; GCA_019240655.1, s__Palsa-465 sp019240655, 95.0, 77.01, 0.23; GCA_005883415.1, s__Palsa-465 sp005883415, 95.0, 77.01, 0.13; GCA_019244395.1, s__Palsa-465 sp019244395, 95.0, 76.96, 0.2; GCA_019244745.1, s__Palsa-465 sp019244745, 95.0, 76.95, 0.19; GCA_019239445.1, s__Palsa-465 sp019239445, 95.0, 76.94, 0.2; GCA_019247075.1, s__Palsa-465 sp019247075, 95.0, 76.9, 0.16; GCA_003165655.1, s__Palsa-465 sp003165655, 95.0, 76.39, 0.15 68.98 11 0.9747606806435155 Genome not assigned to closest species as it falls outside its pre-defined ANI radius +bins.20 d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Burkholderiaceae;g__GJ-E10;s__ N/A N/A N/A N/A N/A GCA_903871435.1 95.0 d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Burkholderiaceae;g__GJ-E10;s__GJ-E10 sp903871435 77.81 0.28 d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Burkholderiaceae;g__GJ-E10;s__ taxonomic classification defined by topology and ANI classification based on placement in class-level tree GCA_900290335.1, s__GJ-E10 sp900290335, 95.0, 78.27, 0.3; GCA_900290295.1, s__GJ-E10 sp900290295, 95.0, 78.16, 0.24; GCA_000828975.1, s__GJ-E10 sp000828975, 95.0, 78.02, 0.21 95.89 11 0.9552906587742479 N/A +bins.21 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidobacteriales;f__Koribacteraceae;g__TOLSYN;s__ N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidobacteriales;f__Koribacteraceae;g__TOLSYN;s__ ANI classification based on placement in class-level tree GCA_903832295.1, s__TOLSYN sp903832295, 95.0, 80.31, 0.46; GCA_903851035.1, s__TOLSYN sp903851035, 95.0, 78.62, 0.4; GCA_003010405.1, s__TOLSYN sp003010405, 95.0, 78.35, 0.28 71.43 11 0.9404257107269934 N/A +bins.22 d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Pedosphaerales;f__Pedosphaeraceae;g__;s__ N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Pedosphaerales;f__Pedosphaeraceae;g__;s__ taxonomic novelty determined using RED classification based on placement in class-level tree N/A 61.66 11 0.8379272061383479 N/A +bins.33 d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__ATCC43930;f__Stellaceae;g__REEB95;s__ N/A N/A N/A N/A N/A GCA_018971215.1 95.0 d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__ATCC43930;f__Stellaceae;g__REEB95;s__REEB95 sp018971215 79.29 0.5 d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__ATCC43930;f__Stellaceae;g__REEB95;s__ taxonomic classification defined by topology and ANI classification based on placement in class-level tree N/A 57.61 11 0.9007596592919825 Genome has more than 12.5% of markers with multiple hits;Genome not assigned to closest species as it falls outside its pre-defined ANI radius +bins.37 d__Bacteria;p__Eremiobacterota;c__Eremiobacteria;o__Baltobacterales;f__Baltobacteraceae;g__JAFAMS01;s__ N/A N/A N/A N/A N/A GCA_019233165.1 95.0 d__Bacteria;p__Eremiobacterota;c__Eremiobacteria;o__Baltobacterales;f__Baltobacteraceae;g__JAFAMS01;s__JAFAMS01 sp019233165 77.35 0.24 d__Bacteria;p__Eremiobacterota;c__Eremiobacteria;o__Baltobacterales;f__Baltobacteraceae;g__JAFAMS01;s__ taxonomic classification defined by topology and ANI classification based on placement in class-level tree N/A 73.23 11 0.9116654810893265 N/A +bins.38 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidoferrales;f__UBA7541;g__Acidoferrum;s__Acidoferrum sp903970165 GCA_903970165.1 95.0 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidoferrales;f__UBA7541;g__Acidoferrum;s__Acidoferrum sp903970165 99.79 0.95 GCA_903970165.1 95.0 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidoferrales;f__UBA7541;g__Acidoferrum;s__Acidoferrum sp903970165 99.79 0.95 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidoferrales;f__UBA7541;g__Acidoferrum;s__ taxonomic classification defined by topology and ANI topological placement and ANI have congruent species assignments GCA_013289955.1, s__Acidoferrum sp013289955, 95.0, 78.91, 0.48; GCA_013289825.1, s__Acidoferrum sp013289825, 95.0, 78.67, 0.38; GCA_003224075.1, s__Acidoferrum sp003224075, 95.0, 77.6, 0.13; GCA_013289835.1, s__Acidoferrum sp013289835, 95.0, 77.59, 0.21; GCA_013289585.1, s__Acidoferrum sp013289585, 95.0, 77.57, 0.21; GCA_003224525.1, s__Acidoferrum sp003224525, 95.0, 77.49, 0.19; GCA_003224055.1, s__Acidoferrum sp003224055, 95.0, 77.45, 0.21; GCA_018268785.1, s__Acidoferrum sp018268785, 95.0, 77.44, 0.12; GCA_003225315.1, s__Acidoferrum sp003225315, 95.0, 77.23, 0.2; GCA_003223295.1, s__Acidoferrum sp003223295, 95.0, 77.21, 0.17; GCA_003224085.1, s__Acidoferrum sp003224085, 95.0, 77.2, 0.15; GCA_003225295.1, s__Acidoferrum sp003225295, 95.0, 77.15, 0.17; GCA_003154775.1, s__Acidoferrum sp003154775, 95.0, 77.05, 0.12; GCA_002478115.1, s__Acidoferrum typicum, 95.0, 77.04, 0.11; GCA_013289855.1, s__Acidoferrum sp013289855, 95.0, 77.01, 0.16; GCA_003161195.1, s__Acidoferrum sp003161195, 95.0, 76.97, 0.14; GCA_013289665.1, s__Acidoferrum sp013289665, 95.0, 76.93, 0.12; GCA_001917435.1, s__Acidoferrum sp001917435, 95.0, 76.93, 0.17; GCA_003224145.1, s__Acidoferrum sp003224145, 95.0, 76.92, 0.13; GCA_001914785.1, s__Acidoferrum sp001914785, 95.0, 76.92, 0.18; GCA_003223255.1, s__Acidoferrum sp003223255, 95.0, 76.86, 0.15; GCA_001919715.1, s__Acidoferrum sp001919715, 95.0, 76.86, 0.09; GCA_003223245.1, s__Acidoferrum sp003223245, 95.0, 76.84, 0.14; GCA_003223215.1, s__Acidoferrum sp003223215, 95.0, 76.82, 0.13; GCA_003224105.1, s__Acidoferrum sp003224105, 95.0, 76.71, 0.12; GCA_019239815.1, s__Acidoferrum sp019239815, 95.0, 76.69, 0.11; GCA_003224135.1, s__Acidoferrum sp003224135, 95.0, 76.42, 0.07 46.54 11 N/A N/A +bins.40 d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Pedosphaerales;f__UBA11358;g__UBA11358;s__ N/A N/A N/A N/A N/A GCA_003133815.1 95.0 d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Pedosphaerales;f__UBA11358;g__UBA11358;s__UBA11358 sp003133815 81.93 0.55 d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Pedosphaerales;f__UBA11358;g__UBA11358;s__ taxonomic classification defined by topology and ANI classification based on placement in class-level tree GCA_003455565.1, s__UBA11358 sp003455565, 95.0, 81.23, 0.48; GCA_003139955.1, s__UBA11358 sp003139955, 95.0, 81.19, 0.53; GCA_903944085.1, s__UBA11358 sp903944085, 95.0, 80.15, 0.57; GCA_903878805.1, s__UBA11358 sp903878805, 95.0, 80.14, 0.46; GCA_903884535.1, s__UBA11358 sp903884535, 95.0, 80.0, 0.52; GCA_903833055.1, s__UBA11358 sp903833055, 95.0, 79.76, 0.44; GCA_903918885.1, s__UBA11358 sp903918885, 95.0, 79.69, 0.46; GCA_903827955.1, s__UBA11358 sp903827955, 95.0, 79.41, 0.5; GCA_903850425.1, s__UBA11358 sp903850425, 95.0, 79.39, 0.51; GCA_903870815.1, s__UBA11358 sp903870815, 95.0, 79.25, 0.42; GCA_903889895.1, s__UBA11358 sp903889895, 95.0, 79.12, 0.37; GCA_903921455.1, s__UBA11358 sp903921455, 95.0, 79.08, 0.45; GCA_903861445.1, s__UBA11358 sp903861445, 95.0, 79.05, 0.38; GCA_903858275.1, s__UBA11358 sp903858275, 95.0, 79.01, 0.4; GCA_903922735.1, s__UBA11358 sp903922735, 95.0, 78.81, 0.41; GCA_003151855.1, s__UBA11358 sp003151855, 95.0, 78.75, 0.38; GCA_903822015.1, s__UBA11358 sp903822015, 95.0, 78.7, 0.35; GCA_903917705.1, s__UBA11358 sp903917705, 95.0, 78.47, 0.31; GCA_903847285.1, s__UBA11358 sp903847285, 95.0, 78.4, 0.35; GCA_903865745.1, s__UBA11358 sp903865745, 95.0, 78.29, 0.37; GCA_903835255.1, s__UBA11358 sp903835255, 95.0, 78.21, 0.34; GCA_903911825.1, s__UBA11358 sp903911825, 95.0, 78.0, 0.3; GCA_903842235.1, s__UBA11358 sp903842235, 95.0, 77.95, 0.34; GCA_903888765.1, s__UBA11358 sp903888765, 95.0, 77.34, 0.19; GCA_903820285.1, s__UBA11358 sp903820285, 95.0, 77.3, 0.25; GCA_903872345.1, s__UBA11358 sp903872345, 95.0, 77.16, 0.31; GCA_903895255.1, s__UBA11358 sp903895255, 95.0, 76.88, 0.23; GCA_903936645.1, s__UBA11358 sp903936645, 95.0, 76.55, 0.15; GCA_903875465.1, s__UBA11358 sp903875465, 95.0, 76.44, 0.16 84.85 11 0.9423589220805788 Genome not assigned to closest species as it falls outside its pre-defined ANI radius +bins.42 d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Pedosphaerales;f__UBA11358;g__UBA7542;s__ N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Pedosphaerales;f__UBA11358;g__UBA7542;s__ taxonomic novelty determined using RED classification based on placement in class-level tree N/A 44.1 11 0.9096785979623646 N/A +bins.43 d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Steroidobacterales;f__Steroidobacteraceae;g__13-2-20CM-66-19;s__ N/A N/A N/A N/A N/A GCA_018241425.1 95.0 d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Steroidobacterales;f__Steroidobacteraceae;g__13-2-20CM-66-19;s__13-2-20CM-66-19 sp018241425 82.58 0.73 d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Steroidobacterales;f__Steroidobacteraceae;g__13-2-20CM-66-19;s__ taxonomic classification defined by topology and ANI classification based on placement in class-level tree GCA_018240525.1, s__13-2-20CM-66-19 sp018240525, 95.0, 81.57, 0.63; GCA_005878835.1, s__13-2-20CM-66-19 sp005878835, 95.0, 80.36, 0.5; GCA_005878095.1, s__13-2-20CM-66-19 sp005878095, 95.0, 80.28, 0.47; GCA_001914695.1, s__13-2-20CM-66-19 sp001914695, 95.0, 80.25, 0.43; GCA_019247195.1, s__13-2-20CM-66-19 sp019247195, 95.0, 80.2, 0.45; GCA_005877965.1, s__13-2-20CM-66-19 sp005877965, 95.0, 80.01, 0.41; GCA_019235245.1, s__13-2-20CM-66-19 sp019235245, 95.0, 79.85, 0.43; GCA_018241445.1, s__13-2-20CM-66-19 sp018241445, 95.0, 79.64, 0.45; GCA_019242385.1, s__13-2-20CM-66-19 sp019242385, 95.0, 79.44, 0.44; GCA_003136935.1, s__13-2-20CM-66-19 sp003136935, 95.0, 79.43, 0.43; GCA_003156695.1, s__13-2-20CM-66-19 sp003156695, 95.0, 78.98, 0.34; GCA_005877705.1, s__13-2-20CM-66-19 sp005877705, 95.0, 78.18, 0.26; GCA_018241505.1, s__13-2-20CM-66-19 sp018241505, 95.0, 77.02, 0.18 60.54 11 0.9759376136112458 Genome not assigned to closest species as it falls outside its pre-defined ANI radius +bins.7 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidobacteriales;f__Acidobacteriaceae;g__Terracidiphilus;s__ N/A N/A N/A N/A N/A GCA_003165005.1 95.0 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidobacteriales;f__Acidobacteriaceae;g__Terracidiphilus;s__Terracidiphilus sp003165005 80.68 0.48 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidobacteriales;f__Acidobacteriaceae;g__Terracidiphilus;s__ taxonomic classification defined by topology and ANI classification based on placement in class-level tree GCA_003171315.1, s__Terracidiphilus sp003171315, 95.0, 78.84, 0.31; GCA_003138515.1, s__Terracidiphilus sp003138515, 95.0, 78.73, 0.38; GCF_900290245.1, s__Terracidiphilus gaucii, 95.0, 78.39, 0.36; GCA_003134815.1, s__Terracidiphilus sp003134815, 95.0, 78.09, 0.32; GCA_003139795.1, s__Terracidiphilus sp003139795, 95.0, 77.73, 0.23; GCA_003170825.1, s__Terracidiphilus sp003170825, 95.0, 77.65, 0.22; GCA_003134855.1, s__Terracidiphilus sp003134855, 95.0, 77.51, 0.2; GCA_003159355.1, s__Terracidiphilus sp003159355, 95.0, 77.48, 0.17; GCA_003142935.1, s__Terracidiphilus sp003142935, 95.0, 77.47, 0.23; GCA_002314435.1, s__Terracidiphilus sp002314435, 95.0, 77.46, 0.16; GCA_903849645.1, s__Terracidiphilus sp903849645, 95.0, 77.44, 0.21; GCA_015654835.1, s__Terracidiphilus sp015654835, 95.0, 77.44, 0.15; GCA_003165095.1, s__Terracidiphilus sp003165095, 95.0, 77.41, 0.22; GCA_000620725.1, s__Terracidiphilus sp000620725, 95.0, 77.34, 0.16; GCA_003138365.1, s__Terracidiphilus sp003138365, 95.0, 77.32, 0.22; GCA_018268915.1, s__Terracidiphilus sp018268915, 95.0, 77.31, 0.1; GCA_015655195.1, s__Terracidiphilus sp015655195, 95.0, 77.26, 0.14; GCA_003161045.1, s__Terracidiphilus sp003161045, 95.0, 77.26, 0.16; GCA_003162495.1, s__Terracidiphilus sp003162495, 95.0, 77.22, 0.12; GCA_903828675.1, s__Terracidiphilus sp903828675, 95.0, 77.16, 0.19; GCA_002307235.1, s__Terracidiphilus sp002307235, 95.0, 77.14, 0.15; GCA_903842065.1, s__Terracidiphilus sp903842065, 95.0, 77.12, 0.16; GCA_003133425.1, s__Terracidiphilus sp003133425, 95.0, 77.06, 0.21; GCA_903911925.1, s__Terracidiphilus sp903911925, 95.0, 77.05, 0.19; GCA_008682415.1, s__Terracidiphilus sp008682415, 95.0, 77.05, 0.15; GCA_003151435.1, s__Terracidiphilus sp003151435, 95.0, 77.04, 0.19; GCA_003165075.1, s__Terracidiphilus sp003165075, 95.0, 77.03, 0.18; GCF_003131205.1, s__Terracidiphilus savannae, 95.0, 76.98, 0.14; GCA_003165935.1, s__Terracidiphilus sp003165935, 95.0, 76.97, 0.12; GCA_003140705.1, s__Terracidiphilus sp003140705, 95.0, 76.93, 0.13; GCA_003142015.1, s__Terracidiphilus sp003142015, 95.0, 76.89, 0.17; GCA_015655365.1, s__Terracidiphilus sp015655365, 95.0, 76.86, 0.15; GCA_003156495.1, s__Terracidiphilus sp003156495, 95.0, 76.86, 0.15; GCA_003140785.1, s__Terracidiphilus sp003140785, 95.0, 76.86, 0.12; GCA_003167305.1, s__Terracidiphilus sp003167305, 95.0, 76.85, 0.17; GCA_003156235.1, s__Terracidiphilus sp003156235, 95.0, 76.83, 0.14; GCA_003151985.1, s__Terracidiphilus sp003151985, 95.0, 76.78, 0.12; GCF_001449115.1, s__Terracidiphilus gabretensis, 95.0, 76.73, 0.12; GCA_003166055.1, s__Terracidiphilus sp003166055, 95.0, 76.71, 0.17; GCA_017883405.1, s__Terracidiphilus sp017883405, 95.0, 76.68, 0.14; GCA_003164635.1, s__Terracidiphilus sp003164635, 95.0, 76.43, 0.12 43.96 11 0.9708336452632881 N/A diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_heatmap.pdf b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_heatmap.pdf new file mode 100644 index 00000000..b58b2402 Binary files /dev/null and b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_heatmap.pdf differ diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_hqmq_bin.zip b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_hqmq_bin.zip new file mode 100644 index 00000000..a5bcf346 Binary files /dev/null and b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_hqmq_bin.zip differ diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_ko_matrix.txt b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_ko_matrix.txt new file mode 100644 index 00000000..b58b2402 --- /dev/null +++ b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_ko_matrix.txt @@ -0,0 +1 @@ +No KO analysis result for nmdc:wfmag-12-fxwdrv82.1 diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_kronaplot.html b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_kronaplot.html new file mode 100644 index 00000000..b58b2402 --- /dev/null +++ b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_kronaplot.html @@ -0,0 +1 @@ +No KO analysis result for nmdc:wfmag-12-fxwdrv82.1 diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_lq_bin.zip b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_lq_bin.zip new file mode 100644 index 00000000..2db16278 Binary files /dev/null and b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_lq_bin.zip differ diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_mags_stats.json b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_mags_stats.json new file mode 100644 index 00000000..2a326515 --- /dev/null +++ b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_mags_stats.json @@ -0,0 +1,314 @@ +{ + "input_contig_num": 2273412, + "too_short_contig_num": 2005162, + "low_depth_contig_num": 0, + "unbinned_contig_num": 241036, + "binned_contig_num": 27214, + "mags_list": [ + { + "bin_name": "bins.40", + "eukaryotic_evaluation": { + "completeness": 17.71, + "contamination": 8.82, + "ncbi_lineage_tax_ids": "1-131567-2759-2611352-33682-191814-2603949", + "ncbi_lineage": "root,cellular organisms,Eukaryota,Discoba,Euglenozoa,Diplonemea,Diplonemidae" + }, + "number_of_contig": 44, + "completeness": 97.3, + "contamination": 3.38, + "total_bases": 0, + "gene_count": "null", + "bin_quality": "MQ", + "num_16s": 0, + "num_5s": 0, + "num_23s": 0, + "num_tRNA": 0, + "gtdbtk_domain": "Bacteria", + "gtdbtk_phylum": "Verrucomicrobiota", + "gtdbtk_class": "Verrucomicrobiae", + "gtdbtk_order": "Pedosphaerales", + "gtdbtk_family": "UBA11358", + "gtdbtk_genus": "UBA11358", + "gtdbtk_species": "null", + "members_id": [ + "nmdc:wfmgas-13-56028x05.1_7_c1", + "nmdc:wfmgas-13-56028x05.1_9_c1", + "nmdc:wfmgas-13-56028x05.1_16_c1", + "nmdc:wfmgas-13-56028x05.1_20_c1", + "nmdc:wfmgas-13-56028x05.1_23_c1", + "nmdc:wfmgas-13-56028x05.1_27_c1", + "nmdc:wfmgas-13-56028x05.1_45_c1", + "nmdc:wfmgas-13-56028x05.1_55_c1", + "nmdc:wfmgas-13-56028x05.1_71_c1", + "nmdc:wfmgas-13-56028x05.1_79_c1", + "nmdc:wfmgas-13-56028x05.1_99_c1", + "nmdc:wfmgas-13-56028x05.1_52_c2", + "nmdc:wfmgas-13-56028x05.1_127_c1", + "nmdc:wfmgas-13-56028x05.1_131_c1", + "nmdc:wfmgas-13-56028x05.1_137_c1", + "nmdc:wfmgas-13-56028x05.1_169_c1", + "nmdc:wfmgas-13-56028x05.1_200_c1", + "nmdc:wfmgas-13-56028x05.1_212_c1", + "nmdc:wfmgas-13-56028x05.1_223_c1", + "nmdc:wfmgas-13-56028x05.1_372_c1", + "nmdc:wfmgas-13-56028x05.1_393_c1", + "nmdc:wfmgas-13-56028x05.1_428_c1", + "nmdc:wfmgas-13-56028x05.1_52_c1", + "nmdc:wfmgas-13-56028x05.1_582_c1", + "nmdc:wfmgas-13-56028x05.1_706_c1", + "nmdc:wfmgas-13-56028x05.1_888_c1", + "nmdc:wfmgas-13-56028x05.1_912_c1", + "nmdc:wfmgas-13-56028x05.1_1268_c1", + "nmdc:wfmgas-13-56028x05.1_1271_c1", + "nmdc:wfmgas-13-56028x05.1_1492_c1", + "nmdc:wfmgas-13-56028x05.1_1494_c1", + "nmdc:wfmgas-13-56028x05.1_1604_c1", + "nmdc:wfmgas-13-56028x05.1_1627_c1", + "nmdc:wfmgas-13-56028x05.1_1888_c1", + "nmdc:wfmgas-13-56028x05.1_1938_c1", + "nmdc:wfmgas-13-56028x05.1_2944_c1", + "nmdc:wfmgas-13-56028x05.1_3261_c1", + "nmdc:wfmgas-13-56028x05.1_3477_c1", + "nmdc:wfmgas-13-56028x05.1_4194_c1", + "nmdc:wfmgas-13-56028x05.1_6257_c1", + "nmdc:wfmgas-13-56028x05.1_7589_c1", + "nmdc:wfmgas-13-56028x05.1_10469_c1", + "nmdc:wfmgas-13-56028x05.1_10553_c1", + "nmdc:wfmgas-13-56028x05.1_13792_c1" + ] + }, + { + "bin_name": "bins.20", + "eukaryotic_evaluation": { + "completeness": 17.71, + "contamination": 8.82, + "ncbi_lineage_tax_ids": "1-131567-2759-2611352-33682-191814-2603949", + "ncbi_lineage": "root,cellular organisms,Eukaryota,Discoba,Euglenozoa,Diplonemea,Diplonemidae" + }, + "number_of_contig": 85, + "completeness": 95.68, + "contamination": 0.0, + "total_bases": 0, + "gene_count": "null", + "bin_quality": "MQ", + "num_16s": 0, + "num_5s": 0, + "num_23s": 0, + "num_tRNA": 0, + "gtdbtk_domain": "Bacteria", + "gtdbtk_phylum": "Proteobacteria", + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_order": "Burkholderiales", + "gtdbtk_family": "Burkholderiaceae", + "gtdbtk_genus": "GJ-E10", + "gtdbtk_species": "null", + "members_id": [ + "nmdc:wfmgas-13-56028x05.1_2_c1", + "nmdc:wfmgas-13-56028x05.1_8_c1", + "nmdc:wfmgas-13-56028x05.1_13_c1", + "nmdc:wfmgas-13-56028x05.1_19_c1", + "nmdc:wfmgas-13-56028x05.1_21_c1", + "nmdc:wfmgas-13-56028x05.1_26_c1", + "nmdc:wfmgas-13-56028x05.1_32_c1", + "nmdc:wfmgas-13-56028x05.1_37_c2", + "nmdc:wfmgas-13-56028x05.1_46_c1", + "nmdc:wfmgas-13-56028x05.1_47_c1", + "nmdc:wfmgas-13-56028x05.1_57_c1", + "nmdc:wfmgas-13-56028x05.1_76_c1", + "nmdc:wfmgas-13-56028x05.1_77_c1", + "nmdc:wfmgas-13-56028x05.1_84_c1", + "nmdc:wfmgas-13-56028x05.1_97_c1", + "nmdc:wfmgas-13-56028x05.1_117_c1", + "nmdc:wfmgas-13-56028x05.1_139_c1", + "nmdc:wfmgas-13-56028x05.1_141_c1", + "nmdc:wfmgas-13-56028x05.1_142_c1", + "nmdc:wfmgas-13-56028x05.1_160_c1", + "nmdc:wfmgas-13-56028x05.1_174_c1", + "nmdc:wfmgas-13-56028x05.1_182_c1", + "nmdc:wfmgas-13-56028x05.1_226_c1", + "nmdc:wfmgas-13-56028x05.1_289_c1", + "nmdc:wfmgas-13-56028x05.1_310_c1", + "nmdc:wfmgas-13-56028x05.1_329_c1", + "nmdc:wfmgas-13-56028x05.1_337_c1", + "nmdc:wfmgas-13-56028x05.1_373_c1", + "nmdc:wfmgas-13-56028x05.1_403_c1", + "nmdc:wfmgas-13-56028x05.1_410_c1", + "nmdc:wfmgas-13-56028x05.1_500_c1", + "nmdc:wfmgas-13-56028x05.1_540_c1", + "nmdc:wfmgas-13-56028x05.1_858_c1", + "nmdc:wfmgas-13-56028x05.1_1483_c1", + "nmdc:wfmgas-13-56028x05.1_1651_c1", + "nmdc:wfmgas-13-56028x05.1_1687_c1", + "nmdc:wfmgas-13-56028x05.1_1795_c1", + "nmdc:wfmgas-13-56028x05.1_1895_c1", + "nmdc:wfmgas-13-56028x05.1_2633_c1", + "nmdc:wfmgas-13-56028x05.1_2744_c1", + "nmdc:wfmgas-13-56028x05.1_3220_c1", + "nmdc:wfmgas-13-56028x05.1_3379_c1", + "nmdc:wfmgas-13-56028x05.1_3550_c1", + "nmdc:wfmgas-13-56028x05.1_3617_c1", + "nmdc:wfmgas-13-56028x05.1_3777_c1", + "nmdc:wfmgas-13-56028x05.1_1362_c2", + "nmdc:wfmgas-13-56028x05.1_3095_c2", + "nmdc:wfmgas-13-56028x05.1_5744_c1", + "nmdc:wfmgas-13-56028x05.1_5821_c1", + "nmdc:wfmgas-13-56028x05.1_6375_c1", + "nmdc:wfmgas-13-56028x05.1_6917_c1", + "nmdc:wfmgas-13-56028x05.1_7031_c1", + "nmdc:wfmgas-13-56028x05.1_7036_c1", + "nmdc:wfmgas-13-56028x05.1_7348_c1", + "nmdc:wfmgas-13-56028x05.1_9420_c1", + "nmdc:wfmgas-13-56028x05.1_9747_c1", + "nmdc:wfmgas-13-56028x05.1_1362_c3", + "nmdc:wfmgas-13-56028x05.1_9963_c1", + "nmdc:wfmgas-13-56028x05.1_10118_c1", + "nmdc:wfmgas-13-56028x05.1_10474_c1", + "nmdc:wfmgas-13-56028x05.1_10640_c1", + "nmdc:wfmgas-13-56028x05.1_10939_c1", + "nmdc:wfmgas-13-56028x05.1_11018_c1", + "nmdc:wfmgas-13-56028x05.1_11125_c1", + "nmdc:wfmgas-13-56028x05.1_11736_c1", + "nmdc:wfmgas-13-56028x05.1_13326_c1", + "nmdc:wfmgas-13-56028x05.1_13428_c1", + "nmdc:wfmgas-13-56028x05.1_13828_c1", + "nmdc:wfmgas-13-56028x05.1_37_c1", + "nmdc:wfmgas-13-56028x05.1_14715_c1", + "nmdc:wfmgas-13-56028x05.1_15174_c1", + "nmdc:wfmgas-13-56028x05.1_17767_c1", + "nmdc:wfmgas-13-56028x05.1_17928_c1", + "nmdc:wfmgas-13-56028x05.1_18037_c1", + "nmdc:wfmgas-13-56028x05.1_18887_c1", + "nmdc:wfmgas-13-56028x05.1_20885_c1", + "nmdc:wfmgas-13-56028x05.1_21369_c1", + "nmdc:wfmgas-13-56028x05.1_21783_c1", + "nmdc:wfmgas-13-56028x05.1_24945_c1", + "nmdc:wfmgas-13-56028x05.1_25148_c1", + "nmdc:wfmgas-13-56028x05.1_26641_c1", + "nmdc:wfmgas-13-56028x05.1_29321_c1", + "nmdc:wfmgas-13-56028x05.1_3095_c1", + "nmdc:wfmgas-13-56028x05.1_32796_c1", + "nmdc:wfmgas-13-56028x05.1_34090_c1" + ] + }, + { + "bin_name": "bins.9", + "eukaryotic_evaluation": { + "completeness": 17.71, + "contamination": 8.82, + "ncbi_lineage_tax_ids": "1-131567-2759-2611352-33682-191814-2603949", + "ncbi_lineage": "root,cellular organisms,Eukaryota,Discoba,Euglenozoa,Diplonemea,Diplonemidae" + }, + "number_of_contig": 92, + "completeness": 0.0, + "contamination": 0.0, + "total_bases": 0, + "gene_count": "null", + "bin_quality": "LQ", + "num_16s": 0, + "num_5s": 0, + "num_23s": 0, + "num_tRNA": 0, + "gtdbtk_domain": "null", + "gtdbtk_phylum": "null", + "gtdbtk_class": "null", + "gtdbtk_order": "null", + "gtdbtk_family": "null", + "gtdbtk_genus": "null", + "gtdbtk_species": "null", + "members_id": [ + "nmdc:wfmgas-13-56028x05.1_7094_c1", + "nmdc:wfmgas-13-56028x05.1_9486_c1", + "nmdc:wfmgas-13-56028x05.1_9853_c1", + "nmdc:wfmgas-13-56028x05.1_10857_c1", + "nmdc:wfmgas-13-56028x05.1_11702_c1", + "nmdc:wfmgas-13-56028x05.1_12042_c1", + "nmdc:wfmgas-13-56028x05.1_14174_c1", + "nmdc:wfmgas-13-56028x05.1_14597_c1", + "nmdc:wfmgas-13-56028x05.1_16115_c1", + "nmdc:wfmgas-13-56028x05.1_16261_c1", + "nmdc:wfmgas-13-56028x05.1_16795_c1", + "nmdc:wfmgas-13-56028x05.1_16943_c1", + "nmdc:wfmgas-13-56028x05.1_17208_c1", + "nmdc:wfmgas-13-56028x05.1_17245_c1", + "nmdc:wfmgas-13-56028x05.1_17383_c1", + "nmdc:wfmgas-13-56028x05.1_17783_c1", + "nmdc:wfmgas-13-56028x05.1_18468_c1", + "nmdc:wfmgas-13-56028x05.1_18553_c1", + "nmdc:wfmgas-13-56028x05.1_18858_c1", + "nmdc:wfmgas-13-56028x05.1_19302_c1", + "nmdc:wfmgas-13-56028x05.1_19824_c1", + "nmdc:wfmgas-13-56028x05.1_20316_c1", + "nmdc:wfmgas-13-56028x05.1_20787_c1", + "nmdc:wfmgas-13-56028x05.1_21029_c1", + "nmdc:wfmgas-13-56028x05.1_21435_c1", + "nmdc:wfmgas-13-56028x05.1_21475_c1", + "nmdc:wfmgas-13-56028x05.1_21484_c1", + "nmdc:wfmgas-13-56028x05.1_21518_c1", + "nmdc:wfmgas-13-56028x05.1_21685_c1", + "nmdc:wfmgas-13-56028x05.1_21809_c1", + "nmdc:wfmgas-13-56028x05.1_21924_c1", + "nmdc:wfmgas-13-56028x05.1_21958_c1", + "nmdc:wfmgas-13-56028x05.1_22186_c1", + "nmdc:wfmgas-13-56028x05.1_22271_c1", + "nmdc:wfmgas-13-56028x05.1_22516_c1", + "nmdc:wfmgas-13-56028x05.1_22514_c1", + "nmdc:wfmgas-13-56028x05.1_22777_c1", + "nmdc:wfmgas-13-56028x05.1_23003_c1", + "nmdc:wfmgas-13-56028x05.1_23115_c1", + "nmdc:wfmgas-13-56028x05.1_23204_c1", + "nmdc:wfmgas-13-56028x05.1_23239_c1", + "nmdc:wfmgas-13-56028x05.1_23352_c1", + "nmdc:wfmgas-13-56028x05.1_23445_c1", + "nmdc:wfmgas-13-56028x05.1_23505_c1", + "nmdc:wfmgas-13-56028x05.1_23571_c1", + "nmdc:wfmgas-13-56028x05.1_24047_c1", + "nmdc:wfmgas-13-56028x05.1_24749_c1", + "nmdc:wfmgas-13-56028x05.1_24981_c1", + "nmdc:wfmgas-13-56028x05.1_25059_c1", + "nmdc:wfmgas-13-56028x05.1_25526_c1", + "nmdc:wfmgas-13-56028x05.1_26162_c1", + "nmdc:wfmgas-13-56028x05.1_26376_c1", + "nmdc:wfmgas-13-56028x05.1_26773_c1", + "nmdc:wfmgas-13-56028x05.1_26816_c1", + "nmdc:wfmgas-13-56028x05.1_26891_c1", + "nmdc:wfmgas-13-56028x05.1_27179_c1", + "nmdc:wfmgas-13-56028x05.1_27272_c1", + "nmdc:wfmgas-13-56028x05.1_27358_c1", + "nmdc:wfmgas-13-56028x05.1_27411_c1", + "nmdc:wfmgas-13-56028x05.1_27550_c1", + "nmdc:wfmgas-13-56028x05.1_28892_c1", + "nmdc:wfmgas-13-56028x05.1_29003_c1", + "nmdc:wfmgas-13-56028x05.1_29238_c1", + "nmdc:wfmgas-13-56028x05.1_29324_c1", + "nmdc:wfmgas-13-56028x05.1_29771_c1", + "nmdc:wfmgas-13-56028x05.1_29878_c1", + "nmdc:wfmgas-13-56028x05.1_30248_c1", + "nmdc:wfmgas-13-56028x05.1_30476_c1", + "nmdc:wfmgas-13-56028x05.1_30587_c1", + "nmdc:wfmgas-13-56028x05.1_31160_c1", + "nmdc:wfmgas-13-56028x05.1_31834_c1", + "nmdc:wfmgas-13-56028x05.1_31922_c1", + "nmdc:wfmgas-13-56028x05.1_31971_c1", + "nmdc:wfmgas-13-56028x05.1_32244_c1", + "nmdc:wfmgas-13-56028x05.1_32605_c1", + "nmdc:wfmgas-13-56028x05.1_32623_c1", + "nmdc:wfmgas-13-56028x05.1_32832_c1", + "nmdc:wfmgas-13-56028x05.1_33068_c1", + "nmdc:wfmgas-13-56028x05.1_33334_c1", + "nmdc:wfmgas-13-56028x05.1_33438_c1", + "nmdc:wfmgas-13-56028x05.1_33855_c1", + "nmdc:wfmgas-13-56028x05.1_34035_c1", + "nmdc:wfmgas-13-56028x05.1_34120_c1", + "nmdc:wfmgas-13-56028x05.1_34140_c1", + "nmdc:wfmgas-13-56028x05.1_34133_c1", + "nmdc:wfmgas-13-56028x05.1_34177_c1", + "nmdc:wfmgas-13-56028x05.1_34481_c1", + "nmdc:wfmgas-13-56028x05.1_34728_c1", + "nmdc:wfmgas-13-56028x05.1_34843_c1", + "nmdc:wfmgas-13-56028x05.1_35665_c1", + "nmdc:wfmgas-13-56028x05.1_35772_c1", + "nmdc:wfmgas-13-56028x05.1_35995_c1" + ] + } + ] +} \ No newline at end of file diff --git a/tests/test_sched.py b/tests/test_sched.py index 8a3c5034..6099999e 100644 --- a/tests/test_sched.py +++ b/tests/test_sched.py @@ -1,41 +1,15 @@ -from nmdc_automation.workflow_automation.sched import Scheduler -from pytest import fixture, mark -from pathlib import Path -from time import time +from nmdc_automation.workflow_automation.sched import Scheduler, SchedulerJob +from pytest import mark -from tests.fixtures.db_utils import init_test, load_fixture, read_json, reset_db - -TRIGGER_SET = 'metagenome_annotation_activity_set' -TRIGGER_ID = 'nmdc:55a79b5dd58771e28686665e3c3faa0c' -TRIGGER_DOID = 'nmdc:1d87115c442a1f83190ae47c7fe4011f' - -def mock_progress(test_db, wf, version=None, flush=True, idx=0): - """ - This function will mock the progress of a workflow. It reads - from a fixture file and inserts one record into the database. - By default, the record will be taken from the first record - in the fixture. You can change the record by changing the - idx parameter. - """ - s = wf.collection - data = read_json("%s.json" % (s))[idx] - - if version: - data['version'] = version - else: - data['version'] = wf.version - data['git_url'] = wf.git_repo - if flush: - test_db[s].delete_many({}) - test_db[s].insert_one(data) +from tests.fixtures.db_utils import init_test, load_fixture, read_json, reset_db @mark.parametrize("workflow_file", [ "workflows.yaml", "workflows-mt.yaml" ]) -def test_scheduler_cycle(test_db, mock_api, workflow_file, workflows_config_dir, site_config): +def test_scheduler_cycle(test_db, mock_api, workflow_file, workflows_config_dir, site_config_file): """ Test basic job creation. """ @@ -47,13 +21,13 @@ def test_scheduler_cycle(test_db, mock_api, workflow_file, workflows_config_dir, reset_db(test_db) load_fixture(test_db, "data_object_set.json") - load_fixture(test_db, "omics_processing_set.json") + load_fixture(test_db, "data_generation_set.json") # Scheduler will find one job to create exp_num_jobs_initial = 1 exp_num_jobs_cycle_1 = 0 jm = Scheduler(test_db, wfn=workflows_config_dir / workflow_file, - site_conf=site_config) + site_conf=site_config_file) resp = jm.cycle() assert len(resp) == exp_num_jobs_initial assert resp[0]["config"]["git_repo"] in exp_rqc_git_repos @@ -66,17 +40,18 @@ def test_scheduler_cycle(test_db, mock_api, workflow_file, workflows_config_dir, "workflows.yaml", "workflows-mt.yaml" ]) -def test_progress(test_db, mock_api, workflow_file, workflows_config_dir, site_config): +def test_progress(test_db, mock_api, workflow_file, workflows_config_dir, site_config_file): reset_db(test_db) metatranscriptome = False if workflow_file == "workflows-mt.yaml": metatranscriptome = True load_fixture(test_db, "data_object_set.json") - load_fixture(test_db, "omics_processing_set.json") + load_fixture(test_db, "data_generation_set.json") + jm = Scheduler(test_db, wfn=workflows_config_dir / workflow_file, - site_conf= site_config) + site_conf= site_config_file) workflow_by_name = dict() for wf in jm.workflows: workflow_by_name[wf.name] = wf @@ -85,86 +60,75 @@ def test_progress(test_db, mock_api, workflow_file, workflows_config_dir, site_c resp = jm.cycle() assert len(resp) == 1 - if metatranscriptome: - wf = workflow_by_name['Metatranscriptome Reads QC Interleave'] - mock_progress(test_db, wf, idx=1) - else: - wf = workflow_by_name['Reads QC Interleave'] - mock_progress(test_db, wf) + # We simulate the RQC job finishing + load_fixture(test_db, "read_qc_analysis.json", col="workflow_execution_set") resp = jm.cycle() if metatranscriptome: # assembly exp_num_post_rqc_jobs = 1 + exp_num_post_annotation_jobs = 1 else: # assembly, rba exp_num_post_rqc_jobs = 2 + exp_num_post_annotation_jobs = 2 assert len(resp) == exp_num_post_rqc_jobs - if metatranscriptome: - wf = workflow_by_name['Metatranscriptome Assembly'] - mock_progress(test_db, wf, version="v0.0.1") + # simulate assembly job finishing + load_fixture(test_db, "metatranscriptome_assembly.json", col="workflow_execution_set") # We should see a metatranscriptome annotation job resp = jm.cycle() assert len(resp) == 1 - assert resp[0]["config"]["activity"]["type"] in [ - "nmdc:MetatranscriptomeAnnotation", - "nmdc:MetatranscriptomeAnnotationActivity" - ] - # We should have a job record for this now + assert resp[0]["config"]["activity"]["type"] == "nmdc:MetatranscriptomeAnnotation" + resp = jm.cycle() + # all jobs should be in a submitted state assert len(resp) == 0 + # simulate annotation job finishing + load_fixture(test_db, "metatranscriptome_annotation.json", col="workflow_execution_set") + resp = jm.cycle() + assert len(resp) == 1 + assert resp[0]["config"]["activity"]["type"] == "nmdc:MetatranscriptomeExpressionAnalysis" else: - # Let's override the version to simulate an older run - # for this workflow that is stil within range of the - # current workflow - wf = workflow_by_name['Metagenome Assembly'] - # TODO: Need to make this test not depend on a hardcoded version - mock_progress(test_db, wf, version="v1.0.2") + # simulate assembly job finishing + load_fixture(test_db, "metagenome_assembly.json", col="workflow_execution_set") # We should see a metagenome annotation job resp = jm.cycle() assert len(resp) == 1 - assert resp[0]["config"]["activity"]["type"] in [ - "nmdc:MetagenomeAnnotation", - "nmdc:MetagenomeAnnotationActivity" - ] - # We should have a job record for this now + assert resp[0]["config"]["activity"]["type"] == "nmdc:MetagenomeAnnotation" + resp = jm.cycle() + # all jobs should be in a submitted state assert len(resp) == 0 - # Simulate Annotation job finishing - wf = workflow_by_name['Metagenome Annotation'] - mock_progress(test_db, wf) - # We should see a MAGs job + + # simulate annotation job finishing + load_fixture(test_db, "metagenome_annotation.json", col="workflow_execution_set") resp = jm.cycle() assert len(resp) == 1 - assert resp[0]["config"]["activity"]["type"] in [ - "nmdc:MagsAnalysis", - "nmdc:MagsAnalysisActivity" - ] - # We should have job records for everything now - resp = jm.cycle() - assert len(resp) == 0 + assert resp[0]["config"]["activity"]["type"] == "nmdc:MagsAnalysis" - # Let's remove the job records. - # Since we don't have activity records for - # MAGS or RBA, we should see two new jobs - test_db.jobs.delete_many({}) - resp = jm.cycle() - assert len(resp) == 2 + resp = jm.cycle() + # all jobs should be in a submitted state + assert len(resp) == 0 + + # Let's remove the job records. + test_db.jobs.delete_many({}) + resp = jm.cycle() + assert len(resp) == exp_num_post_annotation_jobs -def test_multiple_versions(test_db, mock_api, workflows_config_dir, site_config): +def test_multiple_versions(test_db, mock_api, workflows_config_dir, site_config_file): init_test(test_db) reset_db(test_db) test_db.jobs.delete_many({}) load_fixture(test_db, "data_object_set.json") - load_fixture(test_db, "omics_processing_set.json") + load_fixture(test_db, "data_generation_set.json") jm = Scheduler(test_db, wfn=workflows_config_dir / "workflows.yaml", - site_conf=site_config) + site_conf=site_config_file) workflow_by_name = dict() for wf in jm.workflows: workflow_by_name[wf.name] = wf @@ -174,49 +138,35 @@ def test_multiple_versions(test_db, mock_api, workflows_config_dir, site_config) # # We simulate one of the jobs finishing - wf = workflow_by_name['Reads QC'] - mock_progress(test_db, wf) + load_fixture(test_db, "read_qc_analysis.json", col="workflow_execution_set") resp = jm.cycle() # We should see one asm and one rba job assert len(resp) == 2 resp = jm.cycle() assert len(resp) == 0 - # Now simulate one of the other jobs finishing - load_fixture(test_db, "data_object_set2.json", col="data_object_set") - load_fixture(test_db, "read_qc_analysis_activity_set2.json", - col="read_qc_analysis_activity_set") - resp = jm.cycle() - # We should see one asm and one rba job - exp_post_rqc_types = ["nmdc:MetagenomeAssembly", "nmdc:ReadBasedTaxonomyAnalysisActivity"] - post_rqc_types = [j["config"]["activity"]["type"] for j in resp] - assert sorted(post_rqc_types) == sorted(exp_post_rqc_types) - assert len(resp) == 2 - resp = jm.cycle() + # Simulate the assembly job finishing with an older version + load_fixture(test_db, "metagenome_assembly.json", col="workflow_execution_set", version="v1.0.2") - # Empty the job queue. We should see 4 jobs - test_db.jobs.delete_many({}) resp = jm.cycle() - assert len(resp) == 4 + # We should see one rba job + assert len(resp) == 1 + resp = jm.cycle() + assert len(resp) == 0 -def test_out_of_range(test_db, mock_api, workflows_config_dir, site_config): +def test_out_of_range(test_db, mock_api, workflows_config_dir, site_config_file): init_test(test_db) reset_db(test_db) test_db.jobs.delete_many({}) load_fixture(test_db, "data_object_set.json") - load_fixture(test_db, "omics_processing_set.json") + load_fixture(test_db, "data_generation_set.json") jm = Scheduler(test_db, wfn=workflows_config_dir / "workflows.yaml", - site_conf=site_config) - workflow_by_name = dict() - for wf in jm.workflows: - workflow_by_name[wf.name] = wf - + site_conf=site_config_file) # Let's create two RQC records. One will be in range # and the other will not. We should only get new jobs # for the one in range. - wf = workflow_by_name['Reads QC'] - mock_progress(test_db, wf) - mock_progress(test_db, wf, version="v0.0.1", flush=False) + load_fixture(test_db, "read_qc_analysis.json", col="workflow_execution_set") + load_fixture(test_db, "read_qc_analysis.json", col="workflow_execution_set", version="v0.0.1") resp = jm.cycle() # there is one additional metatronscriptome rqc job from the fixture @@ -224,37 +174,47 @@ def test_out_of_range(test_db, mock_api, workflows_config_dir, site_config): resp = jm.cycle() assert len(resp) == 0 -def test_type_resolving(test_db, mock_api, workflows_config_dir, site_config): +def test_type_resolving(test_db, mock_api, workflows_config_dir, site_config_file): """ This tests the handling when the same type is used for different activity types. The desired behavior is to use the first match. """ - - init_test(test_db) reset_db(test_db) - test_db.jobs.delete_many({}) load_fixture(test_db, "data_object_set.json") - load_fixture(test_db, "omics_processing_set.json") - load_fixture(test_db, "read_qc_analysis_activity_set.json") + load_fixture(test_db, "data_generation_set.json") + load_fixture(test_db, "read_qc_analysis.json", col="workflow_execution_set") jm = Scheduler(test_db, wfn=workflows_config_dir / "workflows.yaml", - site_conf=site_config) + site_conf=site_config_file) workflow_by_name = dict() for wf in jm.workflows: workflow_by_name[wf.name] = wf - wf = workflow_by_name['Metagenome Assembly'] - mock_progress(test_db, wf) - wf = workflow_by_name['Metagenome Annotation'] - mock_progress(test_db, wf) + # mock progress + load_fixture(test_db, "metagenome_assembly.json", col="workflow_execution_set") + load_fixture(test_db, "metagenome_annotation.json", col="workflow_execution_set") resp = jm.cycle() - # TODO: This is retruning 4 instead of 2. Need to investigate - # Returns: - # Readbased Analysis v1.0.5 for metagenome - # Readbased Analysis v1.0.5 for metatranscriptome - # Metagenome Assembly for metatranscriptome - # MAGs Analysis for metagenome - # assert len(resp) == 2 + + assert len(resp) == 2 + # assert 'annotation' in resp[1]['config']['inputs']['contig_file'] + + +@mark.parametrize("workflow_file", [ + "workflows.yaml", + "workflows-mt.yaml" +]) +def test_scheduler_add_job_rec(test_db, mock_api, workflow_file, workflows_config_dir, site_config_file): + """ + Test basic job creation. + """ + reset_db(test_db) + load_fixture(test_db, "data_object_set.json") + load_fixture(test_db, "data_generation_set.json") + + jm = Scheduler(test_db, wfn=workflows_config_dir / workflow_file, + site_conf=site_config_file) + # sanity check + assert jm diff --git a/tests/test_watch_nmdc.py b/tests/test_watch_nmdc.py index c36fc5f3..98ea78a5 100644 --- a/tests/test_watch_nmdc.py +++ b/tests/test_watch_nmdc.py @@ -1,92 +1,379 @@ -from nmdc_automation.workflow_automation.watch_nmdc import Watcher -import os +import copy import json -import shutil +from pathlib import PosixPath, Path + +import pytest from pytest import fixture +from unittest import mock +import requests_mock +import shutil +from unittest.mock import patch, PropertyMock, Mock + +from nmdc_schema.nmdc import Database +from nmdc_automation.workflow_automation.watch_nmdc import ( + Watcher, + FileHandler, + JobManager, + RuntimeApiHandler, +) +from nmdc_automation.workflow_automation.wfutils import WorkflowJob +from tests.fixtures import db_utils + + +# FileHandler init tests +def test_file_handler_init_from_state_file(site_config, initial_state_file, tmp_path): + copy_state_file = tmp_path / "copy_state.json" + shutil.copy(initial_state_file, copy_state_file) + fh = FileHandler(site_config, initial_state_file) + assert fh + assert fh.state_file + assert isinstance(fh.state_file, PosixPath) + assert fh.state_file.exists() + assert fh.state_file.is_file() + # delete state file + fh.state_file = None + assert not fh.state_file + + # test setter + fh.state_file = initial_state_file + assert fh.state_file + assert fh.state_file.exists() + assert fh.state_file.is_file() + + # unlink state file + fh.state_file.unlink() + assert not fh.state_file.exists() + fh.state_file = copy_state_file + assert fh.state_file.exists() + assert fh.state_file.is_file() + + +def test_file_handler_init_from_config_agent_state(site_config, initial_state_file, tmp_path): + with patch("nmdc_automation.config.siteconfig.SiteConfig.agent_state", new_callable=PropertyMock) as mock_agent_state: + mock_agent_state.return_value = initial_state_file + fh = FileHandler(site_config) + assert fh + assert fh.state_file + assert fh.state_file.exists() + + +def test_file_handler_init_default_state(site_config): + # sanity check + assert site_config.agent_state is None + fh = FileHandler(site_config) + assert fh + assert fh.state_file + assert fh.state_file.exists() + # delete everything in the state file leaving an empty file + with open(fh.state_file, "w") as f: + f.write("") + assert fh.state_file.stat().st_size == 0 + + # create new FileHandler - should create new state file + fh2 = FileHandler(site_config) + assert fh2 + assert fh2.state_file + assert fh2.state_file.exists() + + +def test_file_handler_read_state(site_config, initial_state_file): + fh = FileHandler(site_config, initial_state_file) + state = fh.read_state() + assert state + assert isinstance(state, dict) + assert state.get("jobs") + assert isinstance(state.get("jobs"), list) + assert len(state.get("jobs")) == 1 + + +def test_file_handler_write_state(site_config, initial_state_file, fixtures_dir): + fh = FileHandler(site_config, initial_state_file) + state = fh.read_state() + assert state + # add new job + new_job = json.load(open(fixtures_dir / "new_state_job.json")) + assert new_job + state["jobs"].append(new_job) + fh.write_state(state) + # read state + new_state = fh.read_state() + assert new_state + assert isinstance(new_state, dict) + assert new_state.get("jobs") + assert isinstance(new_state.get("jobs"), list) + assert len(new_state.get("jobs")) == 2 + # reset state + fh.write_state(state) + + +def test_file_handler_get_output_path(site_config, initial_state_file, fixtures_dir): + # Arrange + was_informed_by = "nmdc:1234" + workflow_execution_id = "nmdc:56789" + mock_job = Mock() + mock_job.was_informed_by = was_informed_by + mock_job.workflow_execution_id = workflow_execution_id + + expected_output_path = site_config.data_dir / Path(was_informed_by) / Path(workflow_execution_id) + + fh = FileHandler(site_config, initial_state_file) + + # Act + output_path = fh.get_output_path(mock_job) + + # Assert + assert output_path + assert isinstance(output_path, PosixPath) + assert output_path == expected_output_path + + +def test_file_handler_write_metadata_if_not_exists(site_config, initial_state_file, fixtures_dir, tmp_path): + # Arrange + was_informed_by = "nmdc:1234" + workflow_execution_id = "nmdc:56789" + job_metadata = {"id": "xyz-123-456", "status": "Succeeded"} + mock_job = Mock() + mock_job.was_informed_by = was_informed_by + mock_job.workflow_execution_id = workflow_execution_id + mock_job.job.metadata = job_metadata + + + # patch config.data_dir + with patch("nmdc_automation.config.siteconfig.SiteConfig.data_dir", new_callable=PropertyMock) as mock_data_dir: + mock_data_dir.return_value = tmp_path + fh = FileHandler(site_config, initial_state_file) + + # Act + metadata_path = fh.write_metadata_if_not_exists(mock_job) + + # Assert + assert metadata_path + assert metadata_path.exists() + assert metadata_path.is_file() + + +# JobManager tests +def test_job_manager_init(site_config, initial_state_file): + # Arrange + fh = FileHandler(site_config, initial_state_file) + jm = JobManager(site_config, fh) + assert jm + assert jm.file_handler + assert jm.file_handler.state_file -@fixture(autouse=True) -def cleanup(): - tdir = os.path.dirname(__file__) - dd = os.path.join(tdir, "..", "test_data", "nmdc:mga0xxx") - if os.path.exists(dd): - shutil.rmtree(dd) - omics_id = "nmdc:omprc-11-nhy4pz43/" - if os.path.exists(f"/tmp/{omics_id}"): - shutil.rmtree(f"/tmp/{omics_id}") - if os.path.exists("/tmp/agent.state"): - os.unlink("/tmp/agent.state") +def test_job_manager_restore_from_state(site_config, initial_state_file): + # Arrange + fh = FileHandler(site_config, initial_state_file) + jm = JobManager(site_config, fh, init_cache=False) + # Act + jm.restore_from_state() + # Assert + assert jm.job_cache + assert isinstance(jm.job_cache, list) + assert len(jm.job_cache) == 1 + assert isinstance(jm.job_cache[0], WorkflowJob) + + +def test_job_manager_job_checkpoint(site_config, initial_state_file): + # Arrange + fh = FileHandler(site_config, initial_state_file) + jm = JobManager(site_config, fh) + # Act + data = jm.job_checkpoint() + # Assert + assert data + assert isinstance(data, dict) + assert data.get("jobs") + assert isinstance(data.get("jobs"), list) + assert len(data.get("jobs")) == 1 + + +def test_job_manager_save_checkpoint(site_config, initial_state_file): + # Arrange + fh = FileHandler(site_config, initial_state_file) + jm = JobManager(site_config, fh) + # Act + jm.save_checkpoint() + # Assert + assert fh.state_file.exists() + assert fh.state_file.is_file() + + # cleanup + fh.state_file.unlink() + +def test_job_manager_find_job_by_opid(site_config, initial_state_file): + # Arrange + fh = FileHandler(site_config, initial_state_file) + jm = JobManager(site_config, fh) + # Act + job = jm.find_job_by_opid("nmdc:test-opid") + # Assert + assert job + assert isinstance(job, WorkflowJob) + assert job.opid == "nmdc:test-opid" + assert not job.done + + +def test_job_manager_prepare_and_cache_new_job(site_config, initial_state_file, fixtures_dir): + # Arrange + fh = FileHandler(site_config, initial_state_file) + jm = JobManager(site_config, fh) + new_job_state = json.load(open(fixtures_dir / "new_state_job.json")) + assert new_job_state + new_job = WorkflowJob(site_config, new_job_state) + # Act + opid = "nmdc:test-opid-2" + job = jm.prepare_and_cache_new_job(new_job, opid) + # Assert + assert job + assert isinstance(job, WorkflowJob) + assert job.opid == opid + assert not job.done + # cleanup + jm.job_cache = [] + + +def test_job_manager_prepare_and_cache_new_job_force(site_config, initial_state_file, fixtures_dir): + # Arrange + fh = FileHandler(site_config, initial_state_file) + jm = JobManager(site_config, fh) + #already has an opid + new_job_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + assert new_job_state + new_job = WorkflowJob(site_config, new_job_state) + # Act + opid = "nmdc:test-opid-1" + job = jm.prepare_and_cache_new_job(new_job, opid, force=True) + # Assert + assert job + assert isinstance(job, WorkflowJob) + assert job.opid == opid + assert not job.done + assert job in jm.job_cache + # resubmit the job without force it will return None + job2 = jm.prepare_and_cache_new_job(job, opid) + assert not job2 + # try again with force + job2 = jm.prepare_and_cache_new_job(job, opid, force=True) + assert job2 + assert isinstance(job2, WorkflowJob) + assert job2.opid == opid + + + + +def test_job_manager_get_finished_jobs(site_config, initial_state_file, fixtures_dir): + # Arrange - initial state has 1 failure and is not done + fh = FileHandler(site_config, initial_state_file) + jm = JobManager(site_config, fh) + + # Add a job to the cache - mags is done and successful + new_job_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + assert new_job_state + new_job = WorkflowJob(site_config, new_job_state) + jm.job_cache.append(new_job) + # sanity check + assert len(jm.job_cache) == 2 + + # add a failed job + failed_job_state = json.load(open(fixtures_dir / "failed_job_state.json")) + assert failed_job_state + failed_job = WorkflowJob(site_config, failed_job_state) + assert failed_job.job_status == "Failed" + jm.job_cache.append(failed_job) + # sanity check + assert len(jm.job_cache) == 3 + + # Act + successful_jobs, failed_jobs = jm.get_finished_jobs() + # Assert + assert successful_jobs + assert failed_jobs + # cleanup + jm.job_cache = [] + + +def test_job_manager_process_successful_job(site_config, initial_state_file, fixtures_dir): + # Arrange + fh = FileHandler(site_config, initial_state_file) + jm = JobManager(site_config, fh) + new_job_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + assert new_job_state + new_job = WorkflowJob(site_config, new_job_state) + jm.job_cache.append(new_job) + # Act + db = jm.process_successful_job(new_job) + # Assert + assert db + assert isinstance(db, Database) + assert new_job.done + assert new_job.job_status == "Succeeded" + # cleanup + jm.job_cache = [] + + +def test_job_manager_process_failed_job(site_config, initial_state_file, fixtures_dir): + # Arrange + fh = FileHandler(site_config, initial_state_file) + jm = JobManager(site_config, fh) + failed_job_state = json.load(open(fixtures_dir / "failed_job_state.json")) + assert failed_job_state + failed_job = WorkflowJob(site_config, failed_job_state) + jm.job_cache.append(failed_job) + # Act + jm.process_failed_job(failed_job) + # Assert + assert failed_job.done @fixture -def mock_nmdc_api(requests_mock, test_data_dir): +def mock_runtime_api_handler(site_config, mock_api): + pass - rqcf = test_data_dir / "rqc_response2.json" - rqc = json.load(open(rqcf)) - resp = {"resources": [rqc]} - requests_mock.get("http://localhost/jobs", json=resp) - requests_mock.post("http://localhost/workflows/activities", json={}) - requests_mock.patch("http://localhost/operations/nmdc:1234", json={}) - requests_mock.get("http://localhost/operations/nmdc:1234", - json={'metadata': {}}) +@mock.patch("nmdc_automation.workflow_automation.wfutils.CromwellRunner.submit_job") +def test_claim_jobs(mock_submit, site_config_file, site_config, fixtures_dir): + # Arrange + mock_submit.return_value = {"id": "nmdc:1234", "detail": {"id": "nmdc:1234"}} + with patch( + "nmdc_automation.workflow_automation.watch_nmdc.RuntimeApiHandler.claim_job" + ) as mock_claim_job, requests_mock.Mocker() as m: + mock_claim_job.return_value = {"id": "nmdc:1234", "detail": {"id": "nmdc:1234"}} + job_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + # remove the opid + job_state.pop("opid") + unclaimed_wfj = WorkflowJob(site_config, workflow_state=job_state) + # mock the status URL response + status_url = f"http://localhost:8088/api/workflows/v1/{unclaimed_wfj.job.job_id}/status" + m.get(status_url, json={"id": "nmdc:1234", "status": "Succeeded"}) -@fixture(autouse=True) -def mock_cromwell(requests_mock, test_data_dir): - requests_mock.real_http = True - data = {"id": "1234"} - cromwell_url = "http://localhost:8088/api/workflows/v1" - requests_mock.post(cromwell_url, json=data) - afile_path = test_data_dir / "afile" - bfile_path = test_data_dir / "bfile" - metadata = {'outputs': { - "nmdc_rqcfilter.filtered_final": str(afile_path), - "nmdc_rqcfilter.filtered_stats_final": str(bfile_path), - "nmdc_rqcfilter.stats": { - "input_read_count": 11431762, - "input_read_bases": 1726196062, - "output_read_bases": 1244017053, - "output_read_count": 8312566 - }, - }} - requests_mock.get(f"{cromwell_url}/1234/metadata", json=metadata) - data = {"status": "Succeeded"} - requests_mock.get(f"{cromwell_url}/1234/status", json=data) - - -def test_watcher(site_config): - w = Watcher(site_config) - w.restore() - w.job_checkpoint() - w.restore() - - -def test_claim_jobs(requests_mock, site_config, mock_nmdc_api): - requests_mock.real_http = True - w = Watcher(site_config) - job_id = "nmdc:b7eb8cda-a6aa-11ed-b1cf-acde48001122" - resp = { - 'id': 'nmdc:1234', - 'detail': {'id': 'nmdc:1234'} - } - requests_mock.post(f"http://localhost/jobs/{job_id}:claim", json=resp) - w.claim_jobs() - w.cycle() - resp = w.find_job_by_opid("nmdc:1234") - assert resp - - -def test_reclaim_job(requests_mock, site_config, mock_nmdc_api): + w = Watcher(site_config_file) + w.claim_jobs(unclaimed_jobs=[unclaimed_wfj]) + + # Assert + assert unclaimed_wfj.job_status + + +def test_runtime_manager_get_unclaimed_jobs(site_config, initial_state_file, fixtures_dir): + # Arrange + rt = RuntimeApiHandler(site_config) + # Act + unclaimed_jobs = rt.get_unclaimed_jobs(rt.config.allowed_workflows) + # Assert + assert unclaimed_jobs + + +def test_reclaim_job(requests_mock, site_config_file, mock_api): requests_mock.real_http = True - w = Watcher(site_config) + w = Watcher(site_config_file) job_id = "nmdc:b7eb8cda-a6aa-11ed-b1cf-acde48001122" - resp = { - 'id': 'nmdc:1234', - 'detail': {'id': 'nmdc:1234'} - } - requests_mock.post(f"http://localhost/jobs/{job_id}:claim", json=resp, - status_code=409) - w.claim_jobs() - resp = w.find_job_by_opid("nmdc:1234") - assert resp \ No newline at end of file + resp = {'id': 'nmdc:1234', 'detail': {'id': 'nmdc:1234'}} + requests_mock.post( + f"http://localhost/jobs/{job_id}:claim", json=resp, status_code=409 + ) # w.claim_jobs() # resp = w.job_manager.find_job_by_opid("nmdc:1234") # assert resp + + +def test_watcher_restore_from_checkpoint(site_config_file, fixtures_dir): + state_file = fixtures_dir / "mags_workflow_state.json" diff --git a/tests/test_wfutils.py b/tests/test_wfutils.py index eac03670..fde4942c 100644 --- a/tests/test_wfutils.py +++ b/tests/test_wfutils.py @@ -1,47 +1,306 @@ -from nmdc_automation.workflow_automation.wfutils import WorkflowJob as job +from nmdc_automation.workflow_automation.wfutils import ( + CromwellRunner, + WorkflowJob, + WorkflowStateManager, + _json_tmp, +) +from nmdc_automation.workflow_automation.models import DataObject, workflow_process_factory +from nmdc_schema.nmdc import MagsAnalysis, EukEval +import io import json +import os +import pytest +import requests +import tempfile +from unittest import mock -def test_job(job_config, requests_mock, test_data_dir): - requests_mock.real_http = True - data = {"id": "123"} - requests_mock.post("http://localhost:8088/api/workflows/v1", json=data) - rqcf = test_data_dir / "rqc_response.json" - rqc = json.load(open(rqcf)) - ajob = job(job_config, workflow_config=rqc['config']) - ajob.debug = True - ajob.dryrun = False - assert ajob.get_state() - ajob.cromwell_submit() - last = requests_mock.request_history[-1] - assert last.method == "POST" - assert last.url == "http://localhost:8088/api/workflows/v1" - - -def test_log(job_config): - ajob = job(job_config, workflow_config={}) - # ajob = job("example", "jobid", conf={}) - ajob.debug = True - ajob.json_log({"a": "b"}, title="Test") - - -def test_check_meta(job_config, requests_mock): - url = "http://localhost:8088/api/workflows/v1/1234/status" - requests_mock.get(url, json={"status": "Submitted"}) - url = "http://localhost:8088/api/workflows/v1/1234/metadata" - requests_mock.get(url, json={"status": "Submitted"}) - ajob = job(job_config, workflow_config={}) - ajob.jobid = "1234" - resp = ajob.check_status() - assert resp - resp = ajob.get_metadata() - assert resp - - -def test_set_state(job_config): - ajob = job(job_config, workflow_config={}) - state = ajob.get_state() - assert state - bjob = job(job_config, state=state) - assert bjob.activity_id == state['activity_id'] +def test_workflow_job(site_config, fixtures_dir): + workflow_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + job_metadata = json.load(open(fixtures_dir / "mags_job_metadata.json")) + + job = WorkflowJob(site_config, workflow_state, job_metadata) + assert job + assert job.workflow_execution_id == workflow_state['activity_id'] + + +def test_cromwell_job_runner(site_config, fixtures_dir): + # load cromwell metadata + job_metadata = json.load(open(fixtures_dir / "mags_job_metadata.json")) + job_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + state_manager = WorkflowStateManager(job_state) + job_runner = CromwellRunner(site_config, state_manager, job_metadata) + assert job_runner + + +def test_cromwell_job_runner_get_job_status(site_config, fixtures_dir, mock_cromwell_api): + # load cromwell metadata + job_metadata = json.load(open(fixtures_dir / "mags_job_metadata.json")) + job_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + # successful job from the test fixtures + job_state['cromwell_jobid'] = "cromwell-job-id-12345" + job_metadata['id'] = "cromwell-job-id-12345" + + state_manager = WorkflowStateManager(job_state) + job_runner = CromwellRunner(site_config, state_manager, job_metadata) + status = job_runner.get_job_status() + assert status + assert status == "Succeeded" + + # failed job from the test fixtures + job_state['cromwell_jobid'] = "cromwell-job-id-54321" + job_metadata['id'] = "cromwell-job-id-54321" + state_manager = WorkflowStateManager(job_state) + job_runner = CromwellRunner(site_config, state_manager, job_metadata) + status = job_runner.get_job_status() + assert status + assert status == "Failed" + + +def test_cromwell_job_runner_get_job_metadata(site_config, fixtures_dir, mock_cromwell_api): + # load cromwell metadata + job_metadata = json.load(open(fixtures_dir / "mags_job_metadata.json")) + job_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + # successful job from the test fixtures + job_state['cromwell_jobid'] = "cromwell-job-id-12345" + job_metadata['id'] = "cromwell-job-id-12345" + + state_manager = WorkflowStateManager(job_state) + job_runner = CromwellRunner(site_config, state_manager, job_metadata) + metadata = job_runner.get_job_metadata() + assert metadata + assert metadata['id'] == "cromwell-job-id-12345" + # check that the metadata is cached + assert job_runner.metadata == metadata + + + + + +def test_workflow_job_as_workflow_execution_dict(site_config, fixtures_dir): + workflow_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + job_metadata = json.load(open(fixtures_dir / "mags_job_metadata.json")) + + wfj = WorkflowJob(site_config, workflow_state, job_metadata) + + wfe_dict = wfj.as_workflow_execution_dict + assert wfe_dict + + +def test_workflow_state_manager(fixtures_dir): + mags_job_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + + state = WorkflowStateManager(mags_job_state) + assert state.workflow_execution_id == mags_job_state['activity_id'] + assert state.config == mags_job_state['conf'] + assert state.execution_template == mags_job_state['conf']['activity'] + assert state.was_informed_by == mags_job_state['conf']['was_informed_by'] + + +# Mock response content +MOCK_FILE_CONTENT = b"Test file content" +MOCK_CHUNK_SIZE = 1024 # Assume the CHUNK_SIZE is 1024 in your class + +@mock.patch('requests.get') +def test_workflow_manager_fetch_release_file_success(mock_get, fixtures_dir): + mock_response = mock.Mock() + mock_response.iter_content = mock.Mock( + return_value=[MOCK_FILE_CONTENT[i:i + MOCK_CHUNK_SIZE] + for i in range(0, len(MOCK_FILE_CONTENT), MOCK_CHUNK_SIZE)] + ) + mock_response.status_code = 200 + mock_get.return_value = mock_response + + # Test the function + initial_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + state = WorkflowStateManager(initial_state) + + file_path = state.fetch_release_file("test_file", ".txt") + print(f"File path: {file_path}") + + assert file_path + assert os.path.exists(file_path), f"File not found at {file_path}" + with open(file_path, 'rb') as f: + assert f.read() == MOCK_FILE_CONTENT + + os.remove(file_path) + + +@mock.patch('requests.get') +def test_workflow_manager_fetch_release_file_failed_download(mock_get, fixtures_dir): + # Mock a failed request + mock_response = mock.Mock() + mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError("404 Client Error: Not Found") + mock_get.return_value = mock_response + + # Test the function + initial_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + state = WorkflowStateManager(initial_state) + + with pytest.raises(requests.exceptions.HTTPError): + state.fetch_release_file("test_file", ".txt") + + # Check that the file was not created + assert not os.path.exists("test_file.txt") + + +@mock.patch('requests.get') +def test_workflow_manager_fetch_release_file_failed_write(mock_get, fixtures_dir): + # Mock the response + mock_response = mock.Mock() + mock_response.iter_content = mock.Mock( + return_value=[MOCK_FILE_CONTENT[i:i + MOCK_CHUNK_SIZE] + for i in range(0, len(MOCK_FILE_CONTENT), MOCK_CHUNK_SIZE)] + ) + mock_response.status_code = 200 + mock_get.return_value = mock_response + + # Patch the tempfile.mkstemp function to raise an exception during file creation + with mock.patch('tempfile.NamedTemporaryFile', side_effect=OSError("Failed to create file")): + # Test the function + initial_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + state = WorkflowStateManager(initial_state) + + with pytest.raises(OSError): + state.fetch_release_file("test_file", ".txt") + + # Check that the file was not created + assert not os.path.exists("test_file.txt") + + +def test_cromwell_runner_setup_inputs_and_labels(site_config, fixtures_dir): + job_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + workflow = WorkflowStateManager(job_state) + runner = CromwellRunner(site_config, workflow) + inputs = runner._generate_workflow_inputs() + assert inputs + # we expect the inputs to be a key-value dict with URLs as values + for key, value in inputs.items(): + if key.endswith("file"): + assert value.startswith("http") + + labels = runner._generate_workflow_labels() + assert labels + assert labels['submitter'] == "nmdcda" + assert labels['git_repo'].startswith("https://github.com/microbiomedata") + assert labels['pipeline'] == labels['wdl'] + + +@mock.patch("nmdc_automation.workflow_automation.wfutils.WorkflowStateManager.fetch_release_file") +def test_cromwell_runner_generate_submission_files( mock_fetch_release_file, site_config, fixtures_dir): + mock_fetch_release_file.side_effect = [ + '/tmp/test_workflow.wdl', + '/tmp/test_bundle.zip', + ] + job_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + assert job_state + workflow = WorkflowStateManager(job_state) + + # Now mock 'open' for the workflow submission files + with mock.patch("builtins.open", new_callable=mock.mock_open) as mock_open: + mock_open.side_effect = [ + io.BytesIO(b"mock wdl file content"), # workflowSource file + io.BytesIO(b"mock bundle file content"), # workflowDependencies file + io.BytesIO(b"mock workflow inputs"), # workflowInputs file + io.BytesIO(b"mock labels") # labels file + ] + runner = CromwellRunner(site_config, workflow) + submission_files = runner.generate_submission_files() + assert submission_files + assert "workflowSource" in submission_files + assert "workflowDependencies" in submission_files + assert "workflowInputs" in submission_files + assert "labels" in submission_files + + # check that the files were written + assert mock_open.call_count == 4 + mock_open.assert_any_call("/tmp/test_workflow.wdl", 'rb') + mock_open.assert_any_call("/tmp/test_bundle.zip", 'rb') + + +@mock.patch("nmdc_automation.workflow_automation.wfutils.WorkflowStateManager.fetch_release_file") +@mock.patch("nmdc_automation.workflow_automation.wfutils.CromwellRunner._cleanup_files") +def test_cromwell_runner_generate_submission_files_exception(mock_cleanup_files, mock_fetch_release_file, + site_config, fixtures_dir): + # Mock file fetching + mock_fetch_release_file.side_effect = [ + '/tmp/test_workflow.wdl', # First file fetch is successful + '/tmp/test_bundle.zip', # Second file fetch is successful + ] + job_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + assert job_state + workflow = WorkflowStateManager(job_state) + + # Now mock 'open' for the workflow submission files + with mock.patch("builtins.open", new_callable=mock.mock_open) as mock_open: + mock_open.side_effect = [ + io.BytesIO(b"mock wdl file content"), # workflowSource file + io.BytesIO(b"mock bundle file content"), # workflowDependencies file + OSError("Failed to open file"), # workflowInputs file + io.BytesIO(b"mock labels") # labels file + ] + runner = CromwellRunner(site_config, workflow) + with pytest.raises(OSError): + runner.generate_submission_files() + # Check that the cleanup function was called + mock_cleanup_files.assert_called_once() + + +@mock.patch("nmdc_automation.workflow_automation.wfutils.CromwellRunner.generate_submission_files") +def test_cromwell_job_runner_submit_job_new_job(mock_generate_submission_files, site_config, fixtures_dir, mock_cromwell_api): + mock_generate_submission_files.return_value = { + "workflowSource": "workflowSource", + "workflowDependencies": "workflowDependencies", + "workflowInputs": "workflowInputs", + "labels": "labels" + } + # A new workflow job that has not been submitted - it has a workflow state + # but no job metadata + wf_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + wf_state['last_status'] = None # simulate a job that has not been submitted + wf_state['cromwell_jobid'] = None # simulate a job that has not been submitted + wf_state['done'] = False # simulate a job that has not been submitted + + wf_state_manager = WorkflowStateManager(wf_state) + job_runner = CromwellRunner(site_config, wf_state_manager) + job_runner.submit_job() + + +def test_workflow_job_data_objects_and_execution_record_mags(site_config, fixtures_dir, tmp_path): + job_metadata = json.load(open(fixtures_dir / "mags_job_metadata.json")) + workflow_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + job = WorkflowJob(site_config, workflow_state, job_metadata) + data_objects = job.make_data_objects(output_dir=tmp_path) + assert data_objects + for data_object in data_objects: + assert isinstance(data_object, DataObject) + wfe_dict = job.make_workflow_execution_record(data_objects) + wfe = workflow_process_factory(wfe_dict) + assert isinstance(wfe, MagsAnalysis) + # attributes from final_stats_json + assert wfe.mags_list + assert isinstance(wfe.mags_list, list) + # check for eukaryotic evaluation in each mag + for mag in wfe.mags_list: + assert mag.eukaryotic_evaluation + assert isinstance(mag.eukaryotic_evaluation, EukEval) + assert mag.eukaryotic_evaluation.completeness + assert mag.eukaryotic_evaluation.contamination + assert mag.eukaryotic_evaluation.ncbi_lineage + assert mag.eukaryotic_evaluation.ncbi_lineage + # check that the other final_stats props are there + assert isinstance(wfe.input_contig_num, int) + assert isinstance(wfe.too_short_contig_num, int) + assert isinstance(wfe.unbinned_contig_num, int) + assert isinstance(wfe.binned_contig_num, int) + + + + +def test_workflow_job_from_database_job_record(site_config, fixtures_dir): + job_rec = json.load(open(fixtures_dir / "nmdc_api/unsubmitted_job.json")) + assert job_rec + job = WorkflowJob(site_config, job_rec) + assert job + assert job.workflow.nmdc_jobid == job_rec['id'] diff --git a/tests/test_activities.py b/tests/test_workflow_process.py similarity index 50% rename from tests/test_activities.py rename to tests/test_workflow_process.py index 5ba510bb..f8813353 100644 --- a/tests/test_activities.py +++ b/tests/test_workflow_process.py @@ -1,22 +1,17 @@ from pytest import mark -from nmdc_automation.workflow_automation.activities import ( - load_activities, - get_required_data_objects_map -) -from nmdc_automation.workflow_automation.workflows import load_workflows -from tests.fixtures.db_utils import get_updated_fixture, load_fixture, reset_db +from nmdc_automation.workflow_automation.workflow_process import ( + get_required_data_objects_map, get_current_workflow_process_nodes, load_workflow_process_nodes) +from nmdc_automation.workflow_automation.workflows import load_workflow_configs +from tests.fixtures.db_utils import load_fixture, reset_db @mark.parametrize( - "workflow_file", [ - "workflows.yaml", - "workflows-mt.yaml" - ] - ) -def test_activies(test_db, workflow_file, workflows_config_dir): + "workflow_file", ["workflows.yaml", "workflows-mt.yaml"] +) +def test_load_workflow_process_nodes(test_db, workflow_file, workflows_config_dir): """ - Test basic job creation + Test """ metatranscriptome = False if workflow_file == "workflows-mt.yaml": @@ -24,36 +19,35 @@ def test_activies(test_db, workflow_file, workflows_config_dir): reset_db(test_db) load_fixture(test_db, "data_object_set.json") - load_fixture(test_db, "omics_processing_set.json") - load_fixture(test_db, "read_qc_analysis_activity_set.json") + load_fixture(test_db, "data_generation_set.json") + load_fixture(test_db, "read_qc_analysis.json", "workflow_execution_set") - wfs = load_workflows(workflows_config_dir / workflow_file) - for wf in wfs: - if not wf.type: - continue - # TODO: these tests are very sensitive to the exact content of the fixture files - need to be more robust - updated_fixtures = get_updated_fixture(wf) - if updated_fixtures: - test_db[wf.collection].delete_many({}) - test_db[wf.collection].insert_many(updated_fixtures) - - acts = load_activities(test_db, wfs) + wfs = load_workflow_configs(workflows_config_dir / workflow_file) + + # these are called by load_activities + data_objs_by_id = get_required_data_objects_map(test_db, wfs) + wf_execs = get_current_workflow_process_nodes(test_db, wfs, data_objs_by_id) + assert wf_execs + assert len(wf_execs) == 2 + + acts = load_workflow_process_nodes(test_db, wfs) # sanity check assert acts + assert len(acts) == 2 # Omics and RQC share data_object_type for metagenome and metatranscriptome # they can be distinguished by analyte category so we expect 1 of each # for metagenome and metatranscriptome - omics = acts[0] - assert omics.type == "nmdc:OmicsProcessing" - assert len(omics.children) == 1 - assert omics.children[0].type.lower() == "nmdc:ReadQCAnalysisActivity".lower() - rqc = acts[1] - assert rqc.type == "nmdc:ReadQcAnalysisActivity" + data_gen = [act for act in acts if act.type == "nmdc:NucleotideSequencing"][0] + assert data_gen + assert data_gen.children + assert len(data_gen.children) == 1 + assert data_gen.children[0].type == "nmdc:ReadQcAnalysis" + @mark.parametrize( "workflow_file", ["workflows.yaml", "workflows-mt.yaml"] - ) +) def test_load_workflows(workflows_config_dir, workflow_file): """ Test Workflow object creation @@ -65,21 +59,15 @@ def test_load_workflows(workflows_config_dir, workflow_file): shared_wf_names = ["Sequencing Noninterleaved", "Sequencing Interleaved"] if metatranscriptome: exp_num_wfs = 9 - exp_wf_names = ["Metatranscriptome Reads QC", - "Metatranscriptome Reads QC Interleave", - "Metatranscriptome Assembly", - "Metatranscriptome Annotation", - "Expression Analysis Antisense", - "Expression Analysis Sense", - "Expression Analysis Nonstranded", - ] + exp_wf_names = ["Metatranscriptome Reads QC", "Metatranscriptome Reads QC Interleave", + "Metatranscriptome Assembly", "Metatranscriptome Annotation", "Expression Analysis Antisense", + "Expression Analysis Sense", "Expression Analysis Nonstranded", ] else: exp_num_wfs = 8 - exp_wf_names = ["Reads QC", "Reads QC Interleave", "Metagenome Assembly", - "Metagenome Annotation", "MAGs", "Readbased Analysis", ] - + exp_wf_names = ["Reads QC", "Reads QC Interleave", "Metagenome Assembly", "Metagenome Annotation", "MAGs", + "Readbased Analysis", ] - wfs = load_workflows(workflows_config_dir / workflow_file) + wfs = load_workflow_configs(workflows_config_dir / workflow_file) assert wfs wfm = {} assert len(wfs) == len(exp_wf_names) + len(shared_wf_names) @@ -96,30 +84,21 @@ def test_load_workflows(workflows_config_dir, workflow_file): assert wf.version is not None assert wf.analyte_category is not None + @mark.parametrize( - "workflow_file", [ - "workflows.yaml", - "workflows-mt.yaml" - ] - ) + "workflow_file", ["workflows.yaml", "workflows-mt.yaml"] +) def test_get_required_data_objects_by_id(test_db, workflows_config_dir, workflow_file): """ Test get_required_data_objects_by_id """ - metatranscriptome = False - if workflow_file == "workflows-mt.yaml": - metatranscriptome = True - # non-comprehensive list of expected data object types - exp_do_types = [ - "Metagenome Raw Read 1", "Metagenome Raw Read 2", "Filtered Sequencing Reads" - ] + exp_do_types = ["Metagenome Raw Read 1", "Metagenome Raw Read 2", "Filtered Sequencing Reads"] # TODO: add workflow specific data objects - reset_db(test_db) load_fixture(test_db, "data_object_set.json") - wfs = load_workflows(workflows_config_dir / workflow_file) + wfs = load_workflow_configs(workflows_config_dir / workflow_file) required_data_object_map = get_required_data_objects_map(test_db, wfs) assert required_data_object_map @@ -130,4 +109,3 @@ def test_get_required_data_objects_by_id(test_db, workflows_config_dir, workflow # check that the expected data object types are present for do_type in exp_do_types: assert do_type in do_types - diff --git a/tests/workflows_test.yaml b/tests/workflows_test.yaml deleted file mode 100644 index 543f88a4..00000000 --- a/tests/workflows_test.yaml +++ /dev/null @@ -1,445 +0,0 @@ -Workflows: - - Name: Sequencing Noninterleaved - Collection: omics_processing_set - Filter Output Objects: - - Metagenome Raw Read 1 - - Metagenome Raw Read 2 - - - Name: Sequencing Interleaved - Collection: omics_processing_set - Filter Output Objects: - - Metagenome Raw Reads - - - Name: Sequencing - Type: nmdc:MetagenomeSequencingActivity - Enabled: False - Git_repo: https://github.com/microbiomedata/RawSequencingData - Version: v1.0.0 - Collection: metagenome_sequencing_activity_set - Outputs: [] - - - Name: Reads QC - Type: nmdc:ReadQcAnalysisActivity - Enabled: True - Git_repo: https://github.com/microbiomedata/ReadsQC - Version: v1.0.7 - WDL: rqcfilter.wdl - Collection: read_qc_analysis_activity_set - Filter Input Objects: - - Metagenome Raw Reads - Predecessors: - - Sequencing - - Sequencing Interleaved - Input_prefix: nmdc_rqcfilter - Inputs: - input_files: do:Metagenome Raw Reads - proj: "{activity_id}" - Activity: - name: "Read QC Activity for {id}" - input_read_bases: "{outputs.stats.input_read_bases}" - input_read_count: "{outputs.stats.input_read_count}" - output_read_bases: "{outputs.stats.output_read_bases}" - output_read_count: "{outputs.stats.output_read_count}" - type: nmdc:ReadQcAnalysisActivity - Outputs: - - output: filtered_final - name: Reads QC result fastq (clean data) - suffix: "_filtered.fastq.gz" - data_object_type: Filtered Sequencing Reads - description: "Reads QC for {id}" - - output: filtered_stats_final - name: Reads QC summary statistics - suffix: "_filterStats.txt" - data_object_type: QC Statistics - description: "Reads QC summary for {id}" - - output: rqc_info - name: File containing read filtering information - suffix: "_readsQC.info" - data_object_type: Read Filtering Info File - description: "Read filtering info for {id}" - - - Name: Reads QC Interleave - Type: nmdc:ReadQcAnalysisActivity - Enabled: True - Git_repo: https://github.com/microbiomedata/ReadsQC - Version: v1.0.7 - Collection: read_qc_analysis_activity_set - WDL: interleave_rqcfilter.wdl - Input_prefix: nmdc_rqcfilter - Inputs: - proj: "{activity_id}" - input_fastq1: do:Metagenome Raw Read 1 - input_fastq2: do:Metagenome Raw Read 2 - Filter Input Objects: - - Metagenome Raw Read 1 - - Metagenome Raw Read 2 - Predecessors: - - Sequencing Noninterleaved - Activity: - name: "Read QC Activity for {id}" - input_read_bases: "{outputs.stats.input_read_bases}" - input_read_count: "{outputs.stats.input_read_count}" - output_read_bases: "{outputs.stats.output_read_bases}" - output_read_count: "{outputs.stats.output_read_count}" - type: nmdc:ReadQcAnalysisActivity - Outputs: - - output: filtered_final - name: Reads QC result fastq (clean data) - suffix: "_filtered.fastq.gz" - data_object_type: Filtered Sequencing Reads - description: "Reads QC for {id}" - - output: filtered_stats_final - name: Reads QC summary statistics - suffix: "_filterStats.txt" - data_object_type: QC Statistics - description: "Reads QC summary for {id}" - - output: rqc_info - name: File containing read filtering information - suffix: "_readsQC.info" - data_object_type: Read Filtering Info File - description: "Read filtering info for {id}" - - - Name: Metagenome Assembly - Type: nmdc:MetagenomeAssembly - Enabled: True - Git_repo: https://github.com/microbiomedata/metaAssembly - Version: v1.0.3 - WDL: jgi_assembly.wdl - Collection: metagenome_assembly_set - Predecessors: - - Reads QC - - Reads QC Interleave - Input_prefix: jgi_metaASM - Inputs: - input_file: do:Filtered Sequencing Reads - rename_contig_prefix: "{activity_id}" - proj: "{activity_id}" - Activity: - name: "Metagenome Assembly Activity for {id}" - type: nmdc:MetagenomeAssembly - asm_score: "{outputs.stats.asm_score}" - contig_bp: "{outputs.stats.contig_bp}" - contigs: "{outputs.stats.contigs}" - ctg_l50: "{outputs.stats.ctg_l50}" - ctg_l90: "{outputs.stats.ctg_l90}" - ctg_logsum: "{outputs.stats.ctg_logsum}" - ctg_max: "{outputs.stats.ctg_max}" - ctg_n50: "{outputs.stats.ctg_n50}" - ctg_n90: "{outputs.stats.ctg_n90}" - ctg_powsum: "{outputs.stats.ctg_powsum}" - gap_pct: "{outputs.stats.gap_pct}" - gc_avg: "{outputs.stats.gc_avg}" - gc_std: "{outputs.stats.gc_std}" - scaf_bp: "{outputs.stats.scaf_bp}" - scaf_l50: "{outputs.stats.scaf_l50}" - scaf_l90: "{outputs.stats.scaf_l90}" - scaf_l_gt50k: "{outputs.stats.scaf_l_gt50k}" - scaf_logsum: "{outputs.stats.scaf_logsum}" - scaf_max: "{outputs.stats.scaf_max}" - scaf_n50: "{outputs.stats.scaf_n50}" - scaf_n90: "{outputs.stats.scaf_n90}" - scaf_n_gt50k: "{outputs.stats.scaf_n_gt50k}" - scaf_pct_gt50k: "{outputs.stats.scaf_pct_gt50k}" - scaf_powsum: "{outputs.stats.scaf_powsum}" - scaffolds: "{outputs.stats.scaffolds}" - Outputs: - - output: contig - name: Final assembly contigs fasta - suffix: "_contigs.fna" - data_object_type: Assembly Contigs - description: "Assembly contigs for {id}" - - output: scaffold - name: Final assembly scaffolds fasta - suffix: "_scaffolds.fna" - data_object_type: Assembly Scaffolds - description: "Assembly scaffolds for {id}" - - output: covstats - name: Assembled contigs coverage information - suffix: "_covstats.txt" - data_object_type: Assembly Coverage Stats - description: "Coverage Stats for {id}" - - output: agp - name: An AGP format file that describes the assembly - suffix: "_assembly.agp" - data_object_type: Assembly AGP - description: "AGP for {id}" - - output: bam - name: Sorted bam file of reads mapping back to the final assembly - suffix: "_pairedMapped_sorted.bam" - data_object_type: Assembly Coverage BAM - description: "Sorted Bam for {id}" - - output: asminfo - name: File containing assembly info - suffix: "_metaAsm.info" - data_object_type: Assembly Info File - description: "Assembly info for {id}" - - - Name: Metagenome Annotation - Type: nmdc:MetagenomeAnnotationActivity - Enabled: True - Git_repo: https://github.com/microbiomedata/mg_annotation - Version: v1.0.4 - WDL: annotation_full.wdl - Collection: metagenome_annotation_activity_set - Predecessors: - - Metagenome Assembly - Input_prefix: annotation - Inputs: - input_file: do:Assembly Contigs - imgap_project_id: "scaffold" - proj: "{activity_id}" - assembly_id: "{predecessor_activity_id}" - Activity: - name: "Metagenome Annotation Analysis Activity for {id}" - type: nmdc:MetagenomeAnnotationActivity - Outputs: - - output: proteins_faa - data_object_type: Annotation Amino Acid FASTA - description: FASTA Amino Acid File for {id} - name: FASTA amino acid file for annotated proteins - suffix: _proteins.faa - - output: structural_gff - data_object_type: Structural Annotation GFF - description: Structural Annotation for {id} - name: GFF3 format file with structural annotations - suffix: _structural_annotation.gff - - output: functional_gff - data_object_type: Functional Annotation GFF - description: Functional Annotation for {id} - name: GFF3 format file with functional annotations - suffix: _functional_annotation.gff - - output: ko_tsv - data_object_type: Annotation KEGG Orthology - description: KEGG Orthology for {id} - name: Tab delimited file for KO annotation - suffix: _ko.tsv - - output: ec_tsv - data_object_type: Annotation Enzyme Commission - description: EC Annotations for {id} - name: Tab delimited file for EC annotation - suffix: _ec.tsv - - output: cog_gff - data_object_type: Clusters of Orthologous Groups (COG) Annotation GFF - description: COGs for {id} - name: GFF3 format file with COGs - suffix: _cog.gff - - output: pfam_gff - data_object_type: Pfam Annotation GFF - description: Pfam Annotation for {id} - name: GFF3 format file with Pfam - suffix: _pfam.gff - - output: tigrfam_gff - data_object_type: TIGRFam Annotation GFF - description: TIGRFam for {id} - name: GFF3 format file with TIGRfam - suffix: _tigrfam.gff - - output: smart_gff - data_object_type: SMART Annotation GFF - description: SMART Annotations for {id} - name: GFF3 format file with SMART - suffix: _smart.gff - - output: supfam_gff - data_object_type: SUPERFam Annotation GFF - description: SUPERFam Annotations for {id} - name: GFF3 format file with SUPERFam - suffix: _supfam.gff - - output: cath_funfam_gff - data_object_type: CATH FunFams (Functional Families) Annotation GFF - description: CATH FunFams for {id} - name: GFF3 format file with CATH FunFams - suffix: _cath_funfam.gff - - output: crt_gff - data_object_type: CRT Annotation GFF - description: CRT Annotations for {id} - name: GFF3 format file with CRT - suffix: _crt.gff - - output: genemark_gff - data_object_type: Genemark Annotation GFF - description: Genemark Annotations for {id} - name: GFF3 format file with Genemark - suffix: _genemark.gff - - output: prodigal_gff - data_object_type: Prodigal Annotation GFF - description: Prodigal Annotations {id} - name: GFF3 format file with Prodigal - suffix: _prodigal.gff - - output: trna_gff - data_object_type: TRNA Annotation GFF - description: TRNA Annotations {id} - name: GFF3 format file with TRNA - suffix: _trna.gff - - output: final_rfam_gff - data_object_type: RFAM Annotation GFF - description: RFAM Annotations for {id} - name: GFF3 format file with RFAM - suffix: _rfam.gff - - output: ko_ec_gff - data_object_type: KO_EC Annotation GFF - description: KO_EC Annotations for {id} - name: GFF3 format file with KO_EC - suffix: _ko_ec.gff - - output: product_names_tsv - data_object_type: Product Names - description: Product names for {id} - name: Product names file - suffix: _product_names.tsv - - output: gene_phylogeny_tsv - data_object_type: Gene Phylogeny tsv - description: Gene Phylogeny for {id} - name: Gene Phylogeny file - suffix: _gene_phylogeny.tsv - - output: crt_crisprs - data_object_type: Crispr Terms - description: Crispr Terms for {id} - name: Crispr Terms - suffix: _crt.crisprs - - output: stats_tsv - data_object_type: Annotation Statistics - description: Annotation Stats for {id} - name: Annotation statistics report - suffix: _stats.tsv - - output: contig_mapping - data_object_type: Contig Mapping File - description: Contig mappings file for {id} - name: Contig mappings between contigs and scaffolds - suffix: _contig_names_mapping.tsv - optional: True - - output: imgap_version - data_object_type: Annotation Info File - description: Annotation info for {id} - name: File containing annotation info - suffix: _imgap.info - - output: renamed_fasta - data_object_type: Assembly Contigs - description: Assembly contigs (remapped) for {id} - name: File containing contigs with annotation headers - - - Name: MAGs - Type: nmdc:MagsAnalysisActivity - Enabled: True - Git_repo: https://github.com/microbiomedata/mg_annotation - Git_repo: https://github.com/microbiomedata/metaMAGs - Version: v1.0.6 - WDL: mbin_nmdc.wdl - Collection: mags_activity_set - Predecessors: - - Metagenome Annotation - Input_prefix: nmdc_mags - Inputs: - contig_file: do:Assembly Contigs - gff_file: do:Functional Annotation GFF - cath_funfam_file: do:CATH FunFams (Functional Families) Annotation GFF - supfam_file: do:SUPERFam Annotation GFF - cog_file: do:Clusters of Orthologous Groups (COG) Annotation GFF - proj_name: "{activity_id}" - pfam_file: do:Pfam Annotation GFF - product_names_file: do:Product Names - tigrfam_file: do:TIGRFam Annotation GFF - ec_file: do:Annotation Enzyme Commission - ko_file: do:Annotation KEGG Orthology - sam_file: do:Assembly Coverage BAM - smart_file: do:SMART Annotation GFF - proteins_file: do:Annotation Amino Acid FASTA - gene_phylogeny_file: do:Gene Phylogeny tsv - proj: "{activity_id}" - map_file: do:Annotation Mapping File - Optional Inputs: - - map_file - Activity: - name: "Metagenome Assembled Genomes Analysis Activity for {id}" - type: nmdc:MagsAnalysisActivity - Outputs: - - output: final_checkm - data_object_type: CheckM Statistics - description: CheckM for {id} - name: CheckM statistics report - suffix: _checkm_qa.out - - output: final_hqmq_bins_zip - data_object_type: Metagenome Bins - description: Metagenome Bins for {id} - name: Metagenome bin tarfiles archive - suffix: _hqmq_bin.zip - - output: final_gtdbtk_bac_summary - data_object_type: GTDBTK Bacterial Summary - description: Bacterial Summary for {id} - name: GTDBTK bacterial summary - suffix: _gtdbtk.bac122.summary.tsv - - output: final_gtdbtk_ar_summary - data_object_type: GTDBTK Archaeal Summary - description: Archaeal Summary for {id} - name: GTDBTK archaeal summary - suffix: _gtdbtk.ar122.summary.tsv - - output: mags_version - data_object_type: Metagenome Bins Info File - description: Metagenome Bins Info File for {id} - name: Metagenome Bins Info File - suffix: _bin.info - - - Name: Readbased Analysis - Type: nmdc:ReadBasedTaxonomyAnalysisActivity - Enabled: True - Git_repo: https://github.com/microbiomedata/ReadbasedAnalysis - Version: v1.0.5 - WDL: ReadbasedAnalysis.wdl - Collection: read_based_taxonomy_analysis_activity_set - Predecessors: - - Reads QC - - Reads QC Interleave - Input_prefix: ReadbasedAnalysis - Inputs: - input_file: do:Filtered Sequencing Reads - proj: "{activity_id}" - Activity: - name: "Readbased Taxonomy Analysis Activity for {id}" - type: nmdc:ReadBasedTaxonomyAnalysisActivity - Outputs: - - output: final_gottcha2_report_tsv - data_object_type: GOTTCHA2 Classification Report - description: GOTTCHA2 Classification for {id} - name: GOTTCHA2 classification report file - suffix: _gottcha2_report.tsv - - output: final_gottcha2_full_tsv - data_object_type: GOTTCHA2 Report Full - description: GOTTCHA2 Full Report for {id} - name: GOTTCHA2 report file - suffix: _gottcha2_full_tsv - - output: final_gottcha2_krona_html - data_object_type: GOTTCHA2 Krona Plot - description: GOTTCHA2 Krona for {id} - name: GOTTCHA2 krona plot HTML file - suffix: _gottcha2_krona.html - - output: final_centrifuge_classification_tsv - data_object_type: Centrifuge Taxonomic Classification - description: Centrifuge Classification for {id} - name: Centrifuge output read classification file - suffix: _centrifuge_classification.tsv - - output: final_centrifuge_report_tsv - data_object_type: Centrifuge output report file - description: Centrifuge Report for {id} - name: Centrifuge Classification Report - suffix: _centrifuge_report.tsv - - output: final_centrifuge_krona_html - data_object_type: Centrifuge Krona Plot - description: Centrifuge Krona for {id} - name: Centrifug krona plot HTML file - suffix: _centrifuge_krona.html - - output: final_kraken2_classification_tsv - data_object_type: Kraken2 Taxonomic Classification - description: Kraken2 Classification for {id} - name: Kraken2 output read classification file - suffix: _kraken2_classification.tsv - - output: final_kraken2_report_tsv - data_object_type: Kraken2 Classification Report - description: Kraken2 Report for {id} - name: Kraken2 output report file - suffix: _kraken2_report.tsv - - output: final_kraken2_krona_html - data_object_type: Kraken2 Krona Plot - description: Kraken2 Krona for {id} - name: Kraken2 Krona plot HTML file - suffix: _kraken2_krona.html - - output: info_file - data_object_type: Read Based Analysis Info File - description: Read based analysis info for {id} - name: File containing reads based analysis information - suffix: profiler.info diff --git a/tests/workflows_test2.yaml b/tests/workflows_test2.yaml deleted file mode 100644 index 5dbf542c..00000000 --- a/tests/workflows_test2.yaml +++ /dev/null @@ -1,398 +0,0 @@ -Workflows: - - Name: Sequencing Noninterleaved - Collection: omics_processing_set - Filter Output Objects: - - Metagenome Raw Read 1 - - Metagenome Raw Read 2 - - - Name: Sequencing Interleaved - Collection: omics_processing_set - Filter Output Objects: - - Metagenome Raw Reads - - - Name: Sequencing - Type: nmdc:MetagenomeSequencing - Enabled: False - Git_repo: https://github.com/microbiomedata/RawSequencingData - Version: v1.0.0 - Collection: metagenome_sequencing_activity_set - - - Name: Reads QC - Type: nmdc:ReadQcAnalysisActivity - Enabled: True - Git_repo: https://github.com/microbiomedata/ReadsQC - Version: v1.0.7 - WDL: rqcfilter.wdl - Collection: read_qc_analysis_activity_set - Filter Input Objects: - - Metagenome Raw Reads - Predecessors: - - Sequencing - - Sequencing Interleaved - Input_prefix: nmdc_rqcfilter - Inputs: - input_files: do:Metagenome Raw Reads - proj: "{activity_id}" - Activity: - name: "Read QC Activity for {id}" - input_read_bases: "{outputs.stats.input_read_bases}" - input_read_count: "{outputs.stats.input_read_count}" - output_read_bases: "{outputs.stats.output_read_bases}" - output_read_count: "{outputs.stats.output_read_count}" - type: nmdc:ReadQcAnalysisActivity - Outputs: - - output: filtered_final - name: Reads QC result fastq (clean data) - data_object_type: Filtered Sequencing Reads - description: "Reads QC for {id}" - - output: filtered_stats_final - name: Reads QC summary statistics - data_object_type: QC Statistics - description: "Reads QC summary for {id}" - - - Name: Reads QC - Type: nmdc:ReadQcAnalysisActivity - Enabled: True - Git_repo: https://github.com/microbiomedata/ReadsQC - Version: v1.1.8 - WDL: rqcfilter.wdl - Collection: read_qc_analysis_activity_set - Filter Input Objects: - - Metagenome Raw Reads - Predecessors: - - Sequencing - - Sequencing Interleaved - Input_prefix: nmdc_rqcfilter - Inputs: - input_files: do:Metagenome Raw Reads - proj: "{activity_id}" - Activity: - name: "Read QC Activity for {id}" - input_read_bases: "{outputs.stats.input_read_bases}" - input_read_count: "{outputs.stats.input_read_count}" - output_read_bases: "{outputs.stats.output_read_bases}" - output_read_count: "{outputs.stats.output_read_count}" - type: nmdc:ReadQcAnalysisActivity - Outputs: - - output: filtered_final - name: Reads QC result fastq (clean data) - data_object_type: Filtered Sequencing Reads - description: "Reads QC for {id}" - - output: filtered_stats_final - name: Reads QC summary statistics - data_object_type: QC Statistics - description: "Reads QC summary for {id}" - - - Name: ReadsQC Interleave - Type: nmdc:ReadQcAnalysisActivity - Enabled: True - Git_repo: https://github.com/microbiomedata/ReadsQC - Version: v1.0.7 - Collection: read_qc_analysis_activity_set - WDL: make_interleave_reads.wdl - Input_prefix: make_interleaved_reads - Inputs: - proj: "{activity_id}" - input_file_1: do:Metagenome Raw Read 1 - input_file_2: do:Metagenome Raw Read 2 - Filter Input Objects: - - Metagenome Raw Read 1 - - Metagenome Raw Read 2 - Predecessors: - - Sequencing Noninterleaved - Input_prefix: nmdc_rqcfilter - Activity: - name: "Read QC Activity for {id}" - input_read_bases: "{outputs.stats.input_read_bases}" - input_read_count: "{outputs.stats.input_read_count}" - output_read_bases: "{outputs.stats.output_read_bases}" - output_read_count: "{outputs.stats.output_read_count}" - type: nmdc:ReadQcAnalysisActivity - Outputs: - - output: filtered_final - name: Reads QC result fastq (clean data) - data_object_type: Filtered Sequencing Reads - description: "Reads QC for {id}" - - output: filtered_stats_final - name: Reads QC summary statistics - data_object_type: QC Statistics - description: "Reads QC summary for {id}" - - output: rqc_info - name: File containing read filtering information - data_object_type: Read Filtering Info File - description: "Read filtering info for {id}" - - - Name: Metagenome Assembly - Type: nmdc:MetagenomeAssembly - Enabled: True - Git_repo: https://github.com/microbiomedata/metaAssembly - Version: v1.0.3 - WDL: jgi_assembly.wdl - Collection: metagenome_assembly_set - Predecessors: - - Reads QC - - Reads QC Interleave - Input_prefix: jgi_metaASM - Inputs: - input_file: do:Filtered Sequencing Reads - rename_contig_prefix: "{activity_id}" - proj: "{activity_id}" - Activity: - name: "Metagenome Assembly Activity for {id}" - type: nmdc:MetagenomeAssembly - asm_score: "{outputs.stats.asm_score}" - contig_bp: "{outputs.stats.contig_bp}" - contigs: "{outputs.stats.contigs}" - ctg_l50: "{outputs.stats.ctg_l50}" - ctg_l90: "{outputs.stats.ctg_l90}" - ctg_logsum: "{outputs.stats.ctg_logsum}" - ctg_max: "{outputs.stats.ctg_max}" - ctg_n50: "{outputs.stats.ctg_n50}" - ctg_n90: "{outputs.stats.ctg_n90}" - ctg_powsum: "{outputs.stats.ctg_powsum}" - gap_pct: "{outputs.stats.gap_pct}" - gc_avg: "{outputs.stats.gc_avg}" - gc_std: "{outputs.stats.gc_std}" - scaf_bp: "{outputs.stats.scaf_bp}" - scaf_l50: "{outputs.stats.scaf_l50}" - scaf_l90: "{outputs.stats.scaf_l90}" - scaf_l_gt50k: "{outputs.stats.scaf_l_gt50k}" - scaf_logsum: "{outputs.stats.scaf_logsum}" - scaf_max: "{outputs.stats.scaf_max}" - scaf_n50: "{outputs.stats.scaf_n50}" - scaf_n90: "{outputs.stats.scaf_n90}" - scaf_n_gt50k: "{outputs.stats.scaf_n_gt50k}" - scaf_pct_gt50k: "{outputs.stats.scaf_pct_gt50k}" - scaf_powsum: "{outputs.stats.scaf_powsum}" - scaffolds: "{outputs.stats.scaffolds}" - Outputs: - - output: contig - name: Final assembly contigs fasta - data_object_type: Assembly Contigs - description: "Assembly contigs for {id}" - - output: scaffold - name: Final assembly scaffolds fasta - data_object_type: Assembly Scaffolds - description: "Assembly scaffolds for {id}" - - output: covstats - name: Assembled contigs coverage information - data_object_type: Assembly Coverage Stats - description: "Coverage Stats for {id}" - - output: agp - name: An AGP format file that describes the assembly - data_object_type: Assembly AGP - description: "AGP for {id}" - - output: bam - name: Sorted bam file of reads mapping back to the final assembly - data_object_type: Assembly Coverage BAM - description: "Sorted Bam for {id}" - - - Name: Metagenome Annotation - Type: nmdc:MetagenomeAnnotationActivity - Enabled: True - Git_repo: https://github.com/microbiomedata/mg_annotation - Version: v1.0.4 - WDL: annotation_full.wdl - Collection: metagenome_annotation_activity_set - Predecessors: - - Metagenome Assembly - Input_prefix: annotation - Inputs: - input_file: do:Assembly Contigs - imgap_project_id: "scaffold" - proj: "{activity_id}" - Activity: - name: "Metagenome Annotation Analysis Activity for {id}" - type: nmdc:MetagenomeAnnotationActivity - Outputs: - - output: proteins_faa - data_object_type: Annotation Amino Acid FASTA - description: FASTA Amino Acid File for {id} - name: FASTA amino acid file for annotated proteins - - output: structural_gff - data_object_type: Structural Annotation GFF - description: Structural Annotation for {id} - name: GFF3 format file with structural annotations - - output: functional_gff - data_object_type: Functional Annotation GFF - description: Functional Annotation for {id} - name: GFF3 format file with functional annotations - - output: ko_tsv - data_object_type: Annotation KEGG Orthology - description: KEGG Orthology for {id} - name: Tab delimited file for KO annotation - - output: ec_tsv - data_object_type: Annotation Enzyme Commission - description: EC Annotations for {id} - name: Tab delimited file for EC annotation - - output: cog_gff - data_object_type: Clusters of Orthologous Groups (COG) Annotation GFF - description: COGs for {id} - name: GFF3 format file with COGs - - output: pfam_gff - data_object_type: Pfam Annotation GFF - description: Pfam Annotation for {id} - name: GFF3 format file with Pfam - - output: tigrfam_gff - data_object_type: TIGRFam Annotation GFF - description: TIGRFam for {id} - name: GFF3 format file with TIGRfam - - output: smart_gff - data_object_type: SMART Annotation GFF - description: SMART Annotations for {id} - name: GFF3 format file with SMART - - output: supfam_gff - data_object_type: SUPERFam Annotation GFF - description: SUPERFam Annotations for {id} - name: GFF3 format file with SUPERFam - - output: cath_funfam_gff - data_object_type: CATH FunFams (Functional Families) Annotation GFF - description: CATH FunFams for {id} - name: GFF3 format file with CATH FunFams - - output: crt_gff - data_object_type: CRT Annotation GFF - description: CRT Annotations for {id} - name: GFF3 format file with CRT - - output: genemark_gff - data_object_type: Genmark Annotation GFF - description: Genemark Annotations for {id} - name: GFF3 format file with Genemark - - output: prodigal_gff - data_object_type: Prodigal Annotation GFF - description: Prodigal Annotations {id} - name: GFF3 format file with Prodigal - - output: trna_gff - data_object_type: TRNA Annotation GFF - description: TRNA Annotations {id} - name: GFF3 format file with TRNA - - output: final_rfam_gff - data_object_type: RFAM Annotation GFF - description: RFAM Annotations for {id} - name: GFF3 format file with RFAM - - output: ko_ec_gff - data_object_type: KO_EC Annotation GFF - description: KO_EC Annotations for {id} - name: GFF3 format file with KO_EC - - output: product_names_tsv - data_object_type: Product Names - description: Product names for {id} - name: Product names file - - output: gene_phylogeny_tsv - data_object_type: Gene Phylogeny tsv - description: Gene Phylogeny for {id} - name: Gene Phylogeny file - - output: crt_crisprs - data_object_type: Crisprt Terms - description: Crispr Terms for {id} - name: Crispr Terms - - output: stats_tsv - data_object_type: Annotation Statistics - description: Annotation Stats for {id} - name: Annotation statistics report - - - Name: MAGs - Type: nmdc:MAGsAnalysisActivity - Enabled: True - Git_repo: https://github.com/microbiomedata/mg_annotation - Git_repo: https://github.com/microbiomedata/metaMAGs - Version: v1.0.6 - WDL: mbin_nmdc.wdl - Collection: mags_activity_set - Predecessors: - - Metagenome Annotation - Input_prefix: nmdc_mags - Inputs: - contig_file: do:Assembly Contigs - gff_file: do:Functional Annotation GFF - cath_funfam_file: do:CATH FunFams (Functional Families) Annotation GFF - supfam_file: do:SUPERFam Annotation GFF - cog_file: do:Clusters of Orthologous Groups (COG) Annotation GFF - proj_name: "{activity_id}" - pfam_file: do:Pfam Annotation GFF - product_names_file: do:Product Names - tigrfam_file: do:TIGRFam Annotation GFF - ec_file: do:Annotation Enzyme Commission - ko_file: do:Annotation KEGG Orthology - sam_file: do:Assembly Coverage BAM - smart_file: do:SMART Annotation GFF - proteins_file: do:Annotation Amino Acid FASTA - gene_phylogeny_file: do:Gene Phylogeny tsv - proj: "{activity_id}" - map_file: do:Annotation Mapping File - Optional Inputs: - - map_file - Activity: - name: "Metagenome Assembled Genomes Analysis Activity for {id}" - type: nmdc:MAGsAnalysisActivity - Outputs: - - output: final_checkm - data_object_type: CheckM Statistics - description: CheckM for {id} - name: CheckM statistics report - - output: final_hqmq_bins_zip - data_object_type: Metagenome Bins - description: Metagenome Bins for {id} - name: Metagenome bin tarfiles archive - - output: final_gtdbtk_bac_summary - data_object_type: GTDBTK Bacterial Summary - description: Bacterial Summary for {id} - name: GTDBTK bacterial summary - - output: final_gtdbtk_ar_summary - data_object_type: GTDBTK Archaeal Summary - description: Archaeal Summary for {id} - name: GTDBTK archaeal summary - - - Name: Readbased Analysis - Type: nmdc:ReadBasedTaxonomyAnalysisActivity - Enabled: True - Git_repo: https://github.com/microbiomedata/ReadbasedAnalysis - Version: v1.0.5 - WDL: ReadbasedAnalysis.wdl - Collection: read_based_taxonomy_analysis_activity_set - Predecessors: - - Reads QC - Input_prefix: ReadbasedAnalysis - Inputs: - input_file: do:Filtered Sequencing Reads - proj: "{activity_id}" - Activity: - name: "Readbased Taxonomy Analysis Activity for {id}" - type: nmdc:ReadBasedTaxonomyAnalysisActivity - Outputs: - - output: final_gottcha2_report_tsv - data_object_type: GOTTCHA2 Classification Report - description: GOTTCHA2 Classification for {id} - name: GOTTCHA2 classification report file - - output: final_gottcha2_full_tsv - data_object_type: GOTTCHA2 Report Full - description: GOTTCHA2 Full Report for {id} - name: GOTTCHA2 report file - - output: final_gottcha2_krona_html - data_object_type: GOTTCHA2 Krona Plot - description: GOTTCHA2 Krona for {id} - name: GOTTCHA2 krona plot HTML file - - output: final_centrifuge_classification_tsv - data_object_type: Centrifuge Taxonomic Classification - description: Centrifuge Classification for {id} - name: Centrifuge output read classification file - - output: final_centrifuge_report_tsv - data_object_type: Centrifuge output report file - description: Centrifuge Report for {id} - name: Centrifuge Classification Report - - output: final_centrifuge_krona_html - data_object_type: Centrifuge Krona Plot - description: Centrifuge Krona for {id} - name: Centrifug krona plot HTML file - - output: final_kraken2_classification_tsv - data_object_type: Kraken2 Taxonomic Classification - description: Kraken2 Classification for {id} - name: Kraken2 output read classification file - - output: final_kraken2_report_tsv - data_object_type: Kraken2 Classification Report - description: Kraken2 Report for {id} - name: Kraken2 output report file - - output: final_kraken2_krona_html - data_object_type: Kraken2 Krona Plot - description: Kraken2 Krona for {id} - name: Kraken2 Krona plot HTML file -