diff --git a/.gitignore b/.gitignore index 4c54769e..4c454286 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,8 @@ attic private configs/.local_* attic/data/dryrun_data/ +nmdc_automation/workflow_automation/_state/*.state +nmdc_automation/workflow_automation/_state/*.json # Ignore `coverage.xml` file in this directory. /coverage.xml diff --git a/configs/import.yaml b/configs/import.yaml index f6f26a52..787bf0ea 100644 --- a/configs/import.yaml +++ b/configs/import.yaml @@ -126,7 +126,7 @@ Workflows: Import: false Type: nmdc:MagsAnalysis Git_repo: https://github.com/microbiomedata/metaMAGs - Version: v1.0.6 + Version: v1.3.11 Collection: workflow_execution_set WorkflowExecutionRange: MagsAnalysis Inputs: diff --git a/nmdc_automation/__init__.py b/nmdc_automation/__init__.py index 4e82f8dd..0dd591c9 100644 --- a/nmdc_automation/__init__.py +++ b/nmdc_automation/__init__.py @@ -1,4 +1,4 @@ from .api import nmdcapi -from .config import config +from .config import siteconfig from .import_automation import activity_mapper from .workflow_automation import watch_nmdc, wfutils, workflows, workflow_process diff --git a/nmdc_automation/api/jawsapi.py b/nmdc_automation/api/jawsapi.py index 67e29777..0293893b 100644 --- a/nmdc_automation/api/jawsapi.py +++ b/nmdc_automation/api/jawsapi.py @@ -2,7 +2,7 @@ import requests import uuid -from nmdc_automation.config import Config +from nmdc_automation.config import SiteConfig _base_url = "http://jaws.lbl.gov:5003/api/v2" _base_in = "/pscratch/sd/n/nmjaws/nmdc-prod/inputs" @@ -14,7 +14,7 @@ class JawsApi: def __init__(self, site_configuration): - self.config = Config(site_configuration) + self.config = SiteConfig(site_configuration) self._base_url = self.config.api_url self.client_id = self.config.client_id self.client_secret = self.config.client_secret diff --git a/nmdc_automation/api/nmdcapi.py b/nmdc_automation/api/nmdcapi.py index 87c4a232..15efb92c 100755 --- a/nmdc_automation/api/nmdcapi.py +++ b/nmdc_automation/api/nmdcapi.py @@ -12,7 +12,7 @@ from time import time from typing import Union, List from datetime import datetime, timedelta, timezone -from nmdc_automation.config import Config, UserConfig +from nmdc_automation.config import SiteConfig, UserConfig import logging @@ -50,9 +50,9 @@ class NmdcRuntimeApi: client_id = None client_secret = None - def __init__(self, site_configuration: Union[str, Path, Config]): + def __init__(self, site_configuration: Union[str, Path, SiteConfig]): if isinstance(site_configuration, str) or isinstance(site_configuration, Path): - site_configuration = Config(site_configuration) + site_configuration = SiteConfig(site_configuration) self.config = site_configuration self._base_url = self.config.api_url self.client_id = self.config.client_id @@ -211,6 +211,7 @@ def bump_time(self, obj): resp = requests.patch(url, headers=self.header, data=json.dumps(d)) return resp.json() + # TODO test that this concatenates multi-page results @refresh_token def list_jobs(self, filt=None, max=100) -> List[dict]: url = "%sjobs?max_page_size=%s" % (self._base_url, max) diff --git a/nmdc_automation/config/__init__.py b/nmdc_automation/config/__init__.py index aaee0ab5..9f38ee32 100644 --- a/nmdc_automation/config/__init__.py +++ b/nmdc_automation/config/__init__.py @@ -1 +1 @@ -from .config import Config, UserConfig +from .siteconfig import SiteConfig, UserConfig diff --git a/nmdc_automation/config/config.py b/nmdc_automation/config/siteconfig.py similarity index 90% rename from nmdc_automation/config/config.py rename to nmdc_automation/config/siteconfig.py index f6ef4d4e..a681d8fa 100644 --- a/nmdc_automation/config/config.py +++ b/nmdc_automation/config/siteconfig.py @@ -1,14 +1,18 @@ -from pathlib import Path import tomli from typing import Union import yaml -import os from pathlib import Path +import warnings WORKFLOWS_DIR = Path(__file__).parent / "workflows" class UserConfig: def __init__(self, path): + warnings.warn( + "UserConfig is deprecated and will be removed in a future release. Use SiteConfig instead.", + DeprecationWarning, + stacklevel=2, + ) with open(path, "rb") as file: self.config_data = tomli.load(file) @@ -24,7 +28,7 @@ def username(self): def password(self): return self.config_data["api"]["password"] -class Config: +class SiteConfig: def __init__(self, path: Union[str, Path]): with open(path, "rb") as file: self.config_data = tomli.load(file) @@ -75,7 +79,7 @@ def watch_state(self): @property def agent_state(self): - return self.config_data["state"]["agent_state"] + return self.config_data.get("state", {}).get("agent_state", None) @property def activity_id_state(self): diff --git a/nmdc_automation/config/workflows/workflows.yaml b/nmdc_automation/config/workflows/workflows.yaml index 755ea8cc..50dc4d8b 100644 --- a/nmdc_automation/config/workflows/workflows.yaml +++ b/nmdc_automation/config/workflows/workflows.yaml @@ -288,7 +288,7 @@ Workflows: Enabled: True Analyte Category: Metagenome Git_repo: https://github.com/microbiomedata/metaMAGs - Version: v1.3.10 + Version: v1.3.11 WDL: mbin_nmdc.wdl Collection: workflow_execution_set Predecessors: diff --git a/nmdc_automation/run_process/run_workflows.py b/nmdc_automation/run_process/run_workflows.py index 8db01bbf..401fa2a9 100644 --- a/nmdc_automation/run_process/run_workflows.py +++ b/nmdc_automation/run_process/run_workflows.py @@ -65,7 +65,7 @@ def resubmit(ctx, activity_ids): else: key = "activity_id" for found_job in watcher.jobs: - job_record = found_job.get_state() + job_record = found_job.state() if job_record[key] == act_id: job = found_job break diff --git a/nmdc_automation/workflow_automation/__init__.py b/nmdc_automation/workflow_automation/__init__.py index 94efbd33..2fb3edc5 100644 --- a/nmdc_automation/workflow_automation/__init__.py +++ b/nmdc_automation/workflow_automation/__init__.py @@ -1,3 +1,2 @@ from .watch_nmdc import Watcher from .workflows import load_workflow_configs -from .wfutils import WorkflowJob, NmdcSchema diff --git a/nmdc_automation/workflow_automation/models.py b/nmdc_automation/workflow_automation/models.py index 610a81af..0db245ab 100644 --- a/nmdc_automation/workflow_automation/models.py +++ b/nmdc_automation/workflow_automation/models.py @@ -5,6 +5,7 @@ from typing import List, Dict, Any, Optional, Set, Union from nmdc_schema.nmdc import ( + DataGeneration, FileTypeEnum, NucleotideSequencing, MagsAnalysis, @@ -13,7 +14,6 @@ MetatranscriptomeAssembly, MetatranscriptomeAnnotation, MetatranscriptomeExpressionAnalysis, - PlannedProcess, ReadBasedTaxonomyAnalysis, ReadQcAnalysis, WorkflowExecution @@ -21,7 +21,7 @@ from nmdc_schema import nmdc -def workflow_process_factory(record: Dict[str, Any]) -> PlannedProcess: +def workflow_process_factory(record: Dict[str, Any]) -> Union[DataGeneration, WorkflowExecution]: """ Factory function to create a PlannedProcess subclass object from a record. Subclasses are determined by the "type" field in the record, and can be @@ -38,12 +38,53 @@ def workflow_process_factory(record: Dict[str, Any]) -> PlannedProcess: "nmdc:ReadBasedTaxonomyAnalysis": ReadBasedTaxonomyAnalysis, "nmdc:ReadQcAnalysis": ReadQcAnalysis, } - record.pop("_id", None) + record = _normalize_record(record) + try: cls = process_types[record["type"]] except KeyError: raise ValueError(f"Invalid workflow execution type: {record['type']}") - return cls(**record) + wfe = cls(**record) + return wfe + +def _normalize_record(record: Dict[str, Any]) -> Dict[str, Any]: + """ Normalize the record by removing the _id field and converting the type field to a string """ + record.pop("_id", None) + # for backwards compatibility strip Activity from the end of the type + record["type"] = record["type"].replace("Activity", "") + normalized_record = _strip_empty_values(record) + + # type-specific normalization + if normalized_record["type"] == "nmdc:MagsAnalysis": + normalized_record = _normalize_mags_record(normalized_record) + + return normalized_record + +def _normalize_mags_record(record: Dict[str, Any]) -> Dict[str, Any]: + """ Normalize the record for a MagsAnalysis object """ + for i, mag in enumerate(record.get("mags_list", [])): + if not mag.get("type"): + # Update the original dictionary in the list + record["mags_list"][i]["type"] = "nmdc:MagBin" + # for backwards compatibility normalize num_tRNA to num_t_rna + if "num_tRNA" in mag: + record["mags_list"][i]["num_t_rna"] = mag.pop("num_tRNA") + # add type to eukaryotic_evaluation if it exists + if "eukaryotic_evaluation" in mag: + record["mags_list"][i]["eukaryotic_evaluation"]["type"] = "nmdc:EukEval" + return record + + +def _strip_empty_values(d: Dict[str, Any]) -> Dict[str, Any]: + """ Strip empty values from a record """ + empty_values = [None, "", [], "null", 0] + def clean_dict(d): + if isinstance(d, dict): + return {k: clean_dict(v) for k, v in d.items() if v not in empty_values} + elif isinstance(d, list): + return [clean_dict(v) for v in d if v not in empty_values] + return d + return clean_dict(d) class WorkflowProcessNode(object): @@ -203,7 +244,6 @@ def add_parent(self, parent: "WorkflowConfig"): self.parents.add(parent) - @dataclass class JobWorkflow: id: str diff --git a/nmdc_automation/workflow_automation/watch_nmdc.py b/nmdc_automation/workflow_automation/watch_nmdc.py index 457761df..0096310c 100644 --- a/nmdc_automation/workflow_automation/watch_nmdc.py +++ b/nmdc_automation/workflow_automation/watch_nmdc.py @@ -6,224 +6,219 @@ import logging import shutil from json import loads -from os.path import exists -from typing import List, Dict, Any, Optional, Set +from pathlib import Path +from typing import List, Dict, Any, Optional, Union, Tuple +from nmdc_schema.nmdc import Database from nmdc_automation.api import NmdcRuntimeApi -from nmdc_automation.config import Config +from nmdc_automation.config import SiteConfig from .wfutils import WorkflowJob -from .wfutils import NmdcSchema, _md5 +from .wfutils import _md5 + +DEFAULT_STATE_DIR = Path(__file__).parent / "_state" +DEFAULT_STATE_FILE = DEFAULT_STATE_DIR / "state.json" +INITIAL_STATE = {"jobs": []} logger = logging.getLogger(__name__) class FileHandler: - def __init__(self, config: Config): + """ FileHandler class for managing state and metadata files """ + def __init__(self, config: SiteConfig, state_file: Union[str, Path] = None): + """ Initialize the FileHandler, with a Config object and an optional state file path """ self.config = config - self.state_file = self.config.agent_state - - def load_state_file(self)-> Optional[Dict[str, Any]]: - if not exists(self.state_file): - return None + self._state_file = None + # set state file + if state_file: + self._state_file = Path(state_file) + elif self.config.agent_state: + self._state_file = Path(self.config.agent_state) + else: + # no state file provided or set in config set up a default + # check for a default state directory and create if it doesn't exist + DEFAULT_STATE_DIR.mkdir(parents=True, exist_ok=True) + DEFAULT_STATE_FILE.touch(exist_ok=True) + # if the file is empty write the initial state + if DEFAULT_STATE_FILE.stat().st_size == 0: + with open(DEFAULT_STATE_FILE, "w") as f: + json.dump(INITIAL_STATE, f, indent=2) + self._state_file = DEFAULT_STATE_FILE + + @property + def state_file(self) -> Path: + """ Get the state file path """ + return self._state_file + + @state_file.setter + def state_file(self, value) -> None: + """ Set the state file path """ + self._state_file = value + + def read_state(self)-> Optional[Dict[str, Any]]: + """ Read the state file and return the data """ with open(self.state_file, "r") as f: - return loads(f.read()) - - def save_state_file(self, data): + state = loads(f.read()) + return state + + def write_state(self, data) -> None: + """ Write data to the state file """ + # normalize "id" used in database job records to "nmdc_jobid" + for job in data["jobs"]: + if "id" in job: + job["nmdc_jobid"] = job.pop("id") with open(self.state_file, "w") as f: json.dump(data, f, indent=2) - def get_output_dir(self, job): - data_directory = self.config.data_dir - informed_by = job.workflow_config["was_informed_by"] - workflow_execution_id = job.activity_id - outdir = os.path.join(data_directory, informed_by, workflow_execution_id) - if not os.path.exists(outdir): - os.makedirs(outdir) - return outdir - - def write_metadata_if_not_exists(self, job, outdir): - metadata_filepath = os.path.join(outdir, "metadata.json") - if not os.path.exists(metadata_filepath): + def get_output_path(self, job: WorkflowJob) -> Path: + """ Get the output path for a job """ + # construct path from string components + output_path = Path(self.config.data_dir) / job.was_informed_by / job.workflow_execution_id + return output_path + + def write_metadata_if_not_exists(self, job: WorkflowJob)->Path: + """ Write metadata to a file if it doesn't exist """ + metadata_filepath = self.get_output_path(job) / "metadata.json" + # make sure the parent directories exist + metadata_filepath.parent.mkdir(parents=True, exist_ok=True) + if not metadata_filepath.exists(): with open(metadata_filepath, "w") as f: - json.dump(job.get_metadata(), f) + json.dump(job.job.metadata, f) + return metadata_filepath class JobManager: - def __init__(self, config, file_handler, api_handler): + """ JobManager class for managing WorkflowJob objects """ + def __init__(self, config: SiteConfig, file_handler: FileHandler, init_cache: bool = True): + """ Initialize the JobManager with a Config object and a FileHandler object """ self.config = config self.file_handler = file_handler - self.api_handler = api_handler - self.job_cache = [] + self._job_cache = [] self._MAX_FAILS = 2 + if init_cache: + self.restore_from_state() + + + @property + def job_cache(self)-> List[WorkflowJob]: + """ Get the job cache """ + return self._job_cache + + @job_cache.setter + def job_cache(self, value) -> None: + """ Set the job cache """ + self._job_cache = value + + def job_checkpoint(self) -> Dict[str, Any]: + """ Get the state data for all jobs """ + jobs = [wfjob.workflow.state for wfjob in self.job_cache] + data = {"jobs": jobs} + return data + + def save_checkpoint(self) -> None: + """ Save jobs to state data """ + data = self.job_checkpoint() + self.file_handler.write_state(data) - def restore_jobs(self, state_data: Dict[str, Any], nocheck=False)-> None: + def restore_from_state(self)-> None: """ Restore jobs from state data """ - self.job_cache = self._find_jobs(state_data, nocheck) + self.job_cache = self.get_workflow_jobs_from_state() - def _find_jobs(self, state_data: dict, nocheck: bool)-> List[WorkflowJob]: + def get_workflow_jobs_from_state(self)-> List[WorkflowJob]: """ Find jobs from state data """ - new_wf_job_list = [] - seen = {} - for job in state_data["jobs"]: - job_id = job["nmdc_jobid"] - if job_id in seen: + wf_job_list = [] + job_cache_ids = [job.opid for job in self.job_cache] + state = self.file_handler.read_state() + for job in state["jobs"]: + if job.get("opid") in job_cache_ids: continue - wf_job = WorkflowJob(self.config, state=job, nocheck=nocheck) - new_wf_job_list.append(wf_job) - seen[job_id] = True - return new_wf_job_list - - def _get_url(self, informed_by, act_id, fname): - root = self.config.url_root - return f"{root}/{informed_by}/{act_id}/{fname}" - - def _get_output_dir(self, informed_by, act_id): - data_directory = self.config.data_dir - outdir = os.path.join(data_directory, informed_by, act_id) - if not os.path.exists(outdir): - os.makedirs(outdir) - return outdir - - def find_job_by_opid(self, opid): + wf_job = WorkflowJob(self.config, workflow_state=job) + job_cache_ids.append(wf_job.opid) + wf_job_list.append(wf_job) + return wf_job_list + + + def find_job_by_opid(self, opid) -> Optional[WorkflowJob]: + """ Find a job by operation id """ return next((job for job in self.job_cache if job.opid == opid), None) - def submit_job(self, new_job, opid, force=False): - common_workflow_id = new_job["workflow"]["id"] - if "object_id_latest" in new_job["config"]: + + def prepare_and_cache_new_job(self, new_job: WorkflowJob, opid: str, force=False)-> Optional[WorkflowJob]: + """ Prepare and cache a new job """ + if "object_id_latest" in new_job.workflow.config: logger.warning("Old record. Skipping.") return - wf_job = self.get_or_create_workflow_job(new_job, opid, common_workflow_id) - self.job_cache.append(wf_job) - wf_job.cromwell_submit(force=force) - - def get_or_create_workflow_job(self, new_job, opid, common_workflow_id)-> WorkflowJob: - wf_job = self.find_job_by_opid(opid) - if not wf_job: - wf_job = WorkflowJob( - site_config=self.config, - type=common_workflow_id, - nmdc_jobid=new_job["id"], - workflow_config=new_job["config"], - opid=opid, - activity_id=new_job["config"]["activity_id"], - ) - return wf_job - - def check_job_status(self): + existing_job = self.find_job_by_opid(opid) + if not existing_job: + new_job.set_opid(opid, force=force) + self.job_cache.append(new_job) + return new_job + elif force: + self.job_cache.remove(existing_job) + new_job.set_opid(opid, force=force) + self.job_cache.append(new_job) + return new_job + + + def get_finished_jobs(self)->Tuple[List[WorkflowJob], List[WorkflowJob]]: + """ Get finished jobs """ + successful_jobs = [] + failed_jobs = [] for job in self.job_cache: - if not job.done: - status = job.check_status() + if job.done: + status = job.job_status if status == "Succeeded" and job.opid: - self.process_successful_job(job) + successful_jobs.append(job) elif status == "Failed" and job.opid: - self.process_failed_job(job) + failed_jobs.append(job) + return (successful_jobs, failed_jobs) - def process_successful_job(self, job: WorkflowJob): - logger.info(f"Running post for op {job.opid}") - outdir = self.file_handler.get_output_dir(job) - schema = NmdcSchema() + def process_successful_job(self, job: WorkflowJob) -> Database: + """ Process a successful job """ + logger.info(f"Running post for op {job.opid}") - output_ids = self.generate_data_objects(job, outdir, schema) + output_path = self.file_handler.get_output_path(job) + if not output_path.exists(): + output_path.mkdir(parents=True, exist_ok=True) - self.create_activity_record(job, output_ids, schema) + database = Database() - self.file_handler.write_metadata_if_not_exists(job, outdir) + data_objects = job.make_data_objects(output_dir=output_path) + database.data_object_set = data_objects + workflow_execution_record = job.make_workflow_execution_record(data_objects) + database.workflow_execution_set = [workflow_execution_record] - nmdc_database_obj = schema.get_database_object_dump() - nmdc_database_obj_dict = json.loads(nmdc_database_obj) - resp = self.api_handler.post_objects(nmdc_database_obj_dict) - logger.info(f"Response: {resp}") - job.done = True - resp = self.api_handler.update_op( - job.opid, done=True, meta=job.get_metadata() - ) - return resp + self.file_handler.write_metadata_if_not_exists(job) + return database - def process_failed_job(self, job): + def process_failed_job(self, job) -> None: + """ Process a failed job """ if job.failed_count < self._MAX_FAILS: job.failed_count += 1 job.cromwell_submit() - def job_checkpoint(self): - jobs = [job.get_state() for job in self.job_cache] - data = {"jobs": jobs} - return data - - def generate_data_objects(self, job, outdir, schema): - output_ids = [] - prefix = job.workflow_config["input_prefix"] - - job_outs = job.get_metadata()["outputs"] - informed_by = job.workflow_config["was_informed_by"] - - for product_record in job.outputs: - outkey = f"{prefix}.{product_record['output']}" - if outkey not in job_outs and product_record.get("optional"): - logging.debug(f"Ignoring optional missing output {outkey}") - continue - - full_name = job_outs[outkey] - file_name = os.path.basename(full_name) - new_path = os.path.join(outdir, file_name) - shutil.copyfile(full_name, new_path) - - md5 = _md5(full_name) - file_url = self._get_url( - job.workflow_config["was_informed_by"], - job.activity_id, - file_name - ) - id = product_record["id"] - schema.make_data_object( - name=file_name, - full_file_name=full_name, - file_url=file_url, - data_object_type=product_record["data_object_type"], - dobj_id=product_record["id"], - md5_sum=md5, - description=product_record["description"], - omics_id=job.activity_id, - ) - - output_ids.append(id) - - return output_ids - - def create_activity_record(self, job, output_ids, schema): - activity_type = job.activity_templ["type"] - name = job.activity_templ["name"].replace("{id}", job.activity_id) - omic_id = job.workflow_config["was_informed_by"] - resource = self.config.resource - schema.create_activity_record( - activity_record=activity_type, - activity_name=name, - workflow=job.workflow_config, - activity_id=job.activity_id, - resource=resource, - has_inputs_list=[dobj["id"] for dobj in job.input_data_objects], - has_output_list=output_ids, - omic_id=omic_id, - start_time=job.start, - end_time=job.end, - ) - class RuntimeApiHandler: def __init__(self, config): self.runtime_api = NmdcRuntimeApi(config) + self.config = config def claim_job(self, job_id): return self.runtime_api.claim_job(job_id) - def list_jobs(self, allowed_workflows)-> List[Dict[str, Any]]: + def get_unclaimed_jobs(self, allowed_workflows)-> List[WorkflowJob]: + jobs = [] filt = { "workflow.id": {"$in": allowed_workflows}, "claims": {"$size": 0} } job_records = self.runtime_api.list_jobs(filt=filt) - return job_records + + for job in job_records: + jobs.append(WorkflowJob(self.config, job)) + + return jobs def post_objects(self, database_obj): return self.runtime_api.post_objects(database_obj) @@ -232,31 +227,52 @@ def update_op(self, opid, done, meta): return self.runtime_api.update_op(opid, done=done, meta=meta) - class Watcher: - def __init__(self, site_configuration_file): + def __init__(self, site_configuration_file: Union[str, Path], state_file: Union[str, Path] = None): self._POLL = 20 self._MAX_FAILS = 2 self.should_skip_claim = False - self.config = Config(site_configuration_file) - self.file_handler = FileHandler(self.config) - self.api_handler = RuntimeApiHandler(self.config) - self.job_manager = JobManager(self.config, self.file_handler, self.api_handler) - self._ALLOWED = self.config.allowed_workflows + self.config = SiteConfig(site_configuration_file) + self.file_handler = FileHandler(self.config, state_file) + self.runtime_api_handler = RuntimeApiHandler(self.config) + self.job_manager = JobManager(self.config, self.file_handler) - def restore_from_checkpoint(self, nocheck: bool = False)-> None: + def restore_from_checkpoint(self, state_data: Dict[str, Any], nocheck: bool = False)-> None: """ Restore from checkpoint """ - state_data = self.file_handler.load_state_file() if state_data: - self.job_manager.restore_jobs(state_data, nocheck=nocheck) + self.file_handler.write_state(state_data) + self.job_manager.restore_from_state() + def cycle(self): self.restore_from_checkpoint() if not self.should_skip_claim: - self.claim_jobs() - self.job_manager.check_job_status() + unclaimed_jobs = self.runtime_api_handler.get_unclaimed_jobs(self.config.allowed_workflows) + self.claim_jobs(unclaimed_jobs) + + successful_jobs, failed_jobs = self.job_manager.get_finished_jobs() + for job in successful_jobs: + job_database = self.job_manager.process_successful_job(job) + job_dict = json.loads(job_database.json(exclude_unset=True)) + + # post workflow execution and data objects to the runtime api + resp = self.runtime_api_handler.post_objects(job_dict) + if not resp.ok: + logger.error(f"Error posting objects: {resp}") + continue + job.done = True + # update the operation record + resp = self.runtime_api_handler.update_op( + job.opid, done=True, meta=job.job.metadata + ) + if not resp.ok: + logger.error(f"Error updating operation: {resp}") + continue + + for job in failed_jobs: + self.job_manager.process_failed_job(job) def watch(self): logger.info("Entering polling loop") @@ -268,10 +284,12 @@ def watch(self): sleep(self._POLL) - def claim_jobs(self): - jobs = self.api_handler.list_jobs(self._ALLOWED) - for job in jobs: - claim = self.api_handler.claim_job(job["id"]) + def claim_jobs(self, unclaimed_jobs: List[WorkflowJob] = None): + # unclaimed_jobs = self.runtime_api_handler.get_unclaimed_jobs(self.config.allowed_workflows) + for job in unclaimed_jobs: + claim = self.runtime_api_handler.claim_job(job.workflow.nmdc_jobid) opid = claim["detail"]["id"] - self.job_manager.submit_job(job, opid) - self.file_handler.save_state_file(self.job_manager.job_checkpoint()) + new_job = self.job_manager.prepare_and_cache_new_job(job, opid) + if new_job: + new_job.job.submit_job() + self.file_handler.write_state(self.job_manager.job_checkpoint()) diff --git a/nmdc_automation/workflow_automation/wfutils.py b/nmdc_automation/workflow_automation/wfutils.py index 6f9dc25b..1b81cc4c 100755 --- a/nmdc_automation/workflow_automation/wfutils.py +++ b/nmdc_automation/workflow_automation/wfutils.py @@ -1,399 +1,348 @@ #!/usr/bin/env python +from abc import ABC, abstractmethod import os import json import tempfile -import requests -import nmdc_schema.nmdc as nmdc import logging -import datetime -import pytz +import re import hashlib -from linkml_runtime.dumpers import json_dumper - -# TODO: Berkley refactoring: -# The NmdcSchema class - responsible for creating workflow and data object records -# to be inserted into the NMDC database will need to be updated to generate Berkley-compatible -# Datageneration and WorkflowExecution records. - -# TODO: Rename this class to something more descriptive - it runs and monitors workflows running in Cromwell -# via the Cromwell REST API. -# Consider renaming to CromwellWorkflowRunner. -# Consider generalizing the class to be able to submit and monitor workflows to other workflow engines e.g. JAWS. -# TODO: Add type hints to all methods, add docstrings to all methods. -# TODO: Rename the package to something more descriptive - it is responsible for running and monitoring workflows. -class WorkflowJob: - DEFAULT_STATUS = "Unsubmitted" - SUCCESS_STATUS = "Succeeded" - METADATA_URL_SUFFIX = "/metadata" - LABEL_SUBMITTER_VALUE = "nmdcda" - LABEL_PARAMETERS = ["release", "wdl", "git_repo"] - CHUNK_SIZE = 1000000 # 1 MB - GIT_RELEASES_PATH = "/releases/download" - - debug = False - dryrun = False - options = None - activity_templ = None - outputs = None - input_data_objects = [] - start = None - end = None - - def __init__( - self, - site_config, - type=None, - workflow_config=None, - nmdc_jobid=None, - opid=None, - activity_id=None, - state=None, - nocheck=False, - ): - self.config = site_config - self.workflow_config = workflow_config - self.set_config_attributes() - if workflow_config: - self.load_workflow_config() - self.set_initial_state(state, activity_id, type, nmdc_jobid, opid) - if self.jobid and not nocheck: - self.check_status() - - def set_config_attributes(self): - # TODO: Why are we not using the config object directly? This is a code smell. - # Consider wrapping with @property decorators to make this more explicit. - self.cromwell_url = self.config.cromwell_url - self.data_dir = self.config.data_dir - self.resource = self.config.resource - self.url_root = self.config.url_root - - # TODO: These could be @property decorators - def load_workflow_config(self): - self.outputs = self.workflow_config.get("outputs") - self.activity_templ = self.workflow_config.get("activity") - self.input_data_objects = self.workflow_config.get("input_data_objects") - - def set_initial_state(self, state, activity_id, typ, nmdc_jobid, opid): - if state: - self.load_state_from_dict(state) - else: - self.set_default_state(activity_id, typ, nmdc_jobid, opid) - - def load_state_from_dict(self, state): - self.activity_id = state["activity_id"] - self.nmdc_jobid = state["nmdc_jobid"] - self.opid = state.get("opid", None) - self.type = state["type"] - self.workflow_config = state["conf"] - self.jobid = state["cromwell_jobid"] - self.last_status = state["last_status"] - self.failed_count = state.get("failed_count", 0) - self.done = state.get("done", None) - self.start = state.get("start") - self.end = state.get("end") - self.load_workflow_config() - - def set_default_state(self, activity_id, typ, nmdc_jobid, opid): - self.activity_id = activity_id - # TODO why? - self.type = typ - self.nmdc_jobid = nmdc_jobid - self.opid = opid - self.done = None - self.jobid = None - self.failed_count = 0 - self.last_status = self.DEFAULT_STATUS - - def get_state(self): - data = { - "type": self.type, - "cromwell_jobid": self.jobid, - "nmdc_jobid": self.nmdc_jobid, - "conf": self.workflow_config, - "activity_id": self.activity_id, - "last_status": self.last_status, - "done": self.done, - "failed_count": self.failed_count, - "start": self.start, - "end": self.end, - "opid": self.opid, - } - return data +from typing import Any, Dict, List, Optional, Union +from pathlib import Path +import shutil - def check_status(self): - """ - Check the status in Cromwell - """ - if not self.jobid: - self.last_status = "Unsubmitted" - return self.last_status +from nmdc_automation.config import SiteConfig +from nmdc_automation.workflow_automation.models import DataObject, workflow_process_factory + +DEFAULT_MAX_RETRIES = 2 + +class JobRunnerABC(ABC): - url = f"{self.cromwell_url}/{self.jobid}/status" + @abstractmethod + def submit_job(self) -> str: + pass - try: - resp = requests.get(url) - resp.raise_for_status() - except requests.exceptions.RequestException as ex: - # logging.error(f"Error checking status: {ex}") - self.last_status = "Error" - return self.last_status + @abstractmethod + def get_job_status(self) -> str: + pass - data = resp.json() - # TODO: Why not name this variable 'status'? - state = data.get("status", "Unknown") - self.last_status = state + @abstractmethod + def get_job_metadata(self) -> Dict[str, Any]: + pass - if state == "Succeeded" and not self.end: - self.end = datetime.datetime.now(pytz.utc).isoformat() + @property + @abstractmethod + def job_id(self) -> Optional[str]: + pass + + @property + @abstractmethod + def outputs(self) -> Dict[str, str]: + pass + + @property + @abstractmethod + def metadata(self) -> Dict[str, Any]: + pass + + @property + @abstractmethod + def max_retries(self) -> int: + pass + + + +class CromwellRunner(JobRunnerABC): + + def __init__(self, site_config: SiteConfig, workflow: "WorkflowStateManager", job_metadata: Dict[str, + Any] = None, max_retries: int = DEFAULT_MAX_RETRIES): + self.config = site_config + self.workflow = workflow + self.service_url = self.config.cromwell_url + self._metadata = {} + if job_metadata: + self._metadata = job_metadata + self._max_retries = max_retries + + + def submit_job(self) -> str: + # TODO: implement + pass + + def get_job_status(self) -> str: + # TODO: implement + return "Pending" + + def get_job_metadata(self) -> Dict[str, Any]: + raise NotImplementedError + # TODO: implement + + @property + def job_id(self) -> Optional[str]: + return self.metadata.get("id", None) + + @property + def outputs(self) -> Dict[str, str]: + return self.metadata.get("outputs", {}) + + @property + def metadata(self) -> Dict[str, Any]: + return self._metadata + + @metadata.setter + def metadata(self, metadata: Dict[str, Any]): + self._metadata = metadata + + @property + def max_retries(self) -> int: + return self._max_retries + + + +class WorkflowStateManager: + def __init__(self, state: Dict[str, Any] = None, opid: str = None): + if state is None: + state = {} + self.cached_state = state + if opid and "opid" in self.cached_state: + raise ValueError("opid already set in job state") + if opid: + self.cached_state["opid"] = opid + + + def update_state(self, state: Dict[str, Any]): + self.cached_state.update(state) + + @property + def state(self) -> Dict[str, Any]: + return self.cached_state + + @property + def config(self) -> Dict[str, Any]: + # for backward compatibility we need to check for both keys + return self.cached_state.get("conf", self.cached_state.get("config", {})) + + @property + def execution_template(self) -> Dict[str, str]: + # for backward compatibility we need to check for both keys + return self.config.get("workflow_execution", self.config.get("activity", {})) + + @property + def workflow_execution_id(self) -> Optional[str]: + # for backward compatibility we need to check for both keys + return self.config.get("activity_id", self.config.get("workflow_execution_id", None)) + + @property + def was_informed_by(self) -> Optional[str]: + return self.config.get("was_informed_by", None) + + @property + def workflow_execution_type(self) -> Optional[str]: + return self.execution_template.get("type", None) + + @property + def workflow_execution_name(self) -> Optional[str]: + name_base = self.execution_template.get("name", None) + if name_base: + return name_base.replace("{id}", self.workflow_execution_id) + return None + + @property + def data_outputs(self) -> List[Dict[str, str]]: + return self.config.get("outputs", []) + + @property + def input_prefix(self) -> Optional[str]: + return self.config.get("input_prefix", None) + + @property + def nmdc_jobid(self)-> Optional[str]: + # different keys in state file vs database record + return self.cached_state.get("nmdc_jobid", self.cached_state.get("id", None)) - return state + @property + def job_runner_id(self) -> Optional[str]: + # for now we only have cromwell as a job runner + job_runner_ids = ["cromwell_jobid", ] + for job_runner_id in job_runner_ids: + if job_runner_id in self.cached_state: + return self.cached_state[job_runner_id] - def get_metadata(self): + +class WorkflowJob: + def __init__(self, site_config: SiteConfig, workflow_state: Dict[str, Any] = None, + job_metadata: Dict['str', Any] = None, opid: str = None, job_runner: JobRunnerABC = None + )-> None: + self.site_config = site_config + self.workflow = WorkflowStateManager(workflow_state, opid) + # default to CromwellRunner if no job_runner is provided + if job_runner is None: + job_runner = CromwellRunner(site_config, self.workflow, job_metadata) + self.job = job_runner + + # Properties to access the site config, job state, and job runner attributes + # getter and setter props for job state opid + @property + def opid(self) -> str: + return self.workflow.state.get("opid", None) + + def set_opid(self, opid: str, force: bool = False): + if self.opid and not force: + raise ValueError("opid already set in job state") + self.workflow.update_state({"opid": opid}) + + @property + def done(self) -> Optional[bool]: + return self.workflow.state.get("done", None) + + @done.setter + def done(self, done: bool): + self.workflow.update_state({"done": done}) + + + @property + def job_status(self) -> str: + status = None + job_id_keys = ["cromwell_jobid"] + failed_count = self.workflow.state.get("failed_count", 0) + # if none of the job id keys are in the workflow state, it is unsubmitted + if not any(key in self.workflow.state for key in job_id_keys): + status = "Unsubmitted" + self.workflow.update_state({"last_status": status}) + elif self.workflow.state.get("last_status") == "Succeeded": + status = "Succeeded" + elif self.workflow.state.get("last_status") == "Failed" and failed_count >= self.job.max_retries: + status = "Failed" + else: + status = self.job.get_job_status() + self.workflow.update_state({"last_status": status}) + return status + + + @property + def workflow_execution_id(self) -> Optional[str]: + return self.workflow.workflow_execution_id + + @property + def cromwell_url(self) -> str: + return self.site_config.cromwell_url + + @property + def data_dir(self) -> str: + return self.site_config.data_dir + + @property + def execution_resource(self) -> str: + return self.site_config.resource + + @property + def url_root(self) -> str: + return self.site_config.url_root + + @property + def was_informed_by(self) -> str: + return self.workflow.was_informed_by + + @property + def as_workflow_execution_dict(self) -> Dict[str, Any]: + # for forward compatibility we need to strip Activity from the type + normalized_type = self.workflow.workflow_execution_type.replace("Activity", "") + base_dict = { + "id": self.workflow_execution_id, + "type": normalized_type, + "name": self.workflow.workflow_execution_name, + "git_url": self.workflow.config["git_repo"], + "execution_resource": self.execution_resource, + "was_informed_by": self.was_informed_by, + "has_input": [dobj["id"] for dobj in self.workflow.config["input_data_objects"]], + "started_at_time": self.workflow.state.get("start"), + "ended_at_time": self.workflow.state.get("end"), + "version": self.workflow.config["release"], + } + return base_dict + + def make_data_objects(self, output_dir: Union[str, Path] = None)-> List[DataObject]: """ - Check the status in Cromwell + Create DataObject objects for each output of the job. """ - if not self.jobid: - return self.DEFAULT_STATUS - url = f"{self.cromwell_url}/{self.jobid}{self.METADATA_URL_SUFFIX}" - resp = requests.get(url) - resp.raise_for_status() - return resp.json() - - def json_log(self, data, title="json_log"): - logging.debug(title) - logging.debug(json.dumps(data, indent=2)) - - def _generate_inputs(self): - inputs = {} - prefix = self.workflow_config["input_prefix"] - for input, input_object in self.workflow_config["inputs"].items(): - input_prefix = f"{prefix}.{input}" - if input_object == "{resource}": - input_object = self.config.resource - inputs[input_prefix] = input_object - return inputs - - def _generate_labels(self): - labels = self.get_label_parameters() - labels["pipeline_version"] = labels["release"] - labels["pipeline"] = labels["wdl"] - labels["activity_id"] = self.activity_id - labels["opid"] = self.opid - labels["submitter"] = self.LABEL_SUBMITTER_VALUE - return labels - - def get_label_parameters(self): - return {param: self.workflow_config[param] for param in self.LABEL_PARAMETERS} - - def fetch_release_file(self, fn, suffix=None): - release = self.workflow_config["release"] - base_url = self.workflow_config["git_repo"].rstrip("/") - url = base_url + f"{self.GIT_RELEASES_PATH}/{release}/{fn}" - - logging.debug(f"BASE URL: {base_url}") - logging.debug(f"URL: {url}") - - resp = requests.get(url, stream=True) - resp.raise_for_status() - - fp, fname = tempfile.mkstemp(suffix=suffix) - try: - with os.fdopen(fp, "wb") as fd: - for chunk in resp.iter_content(chunk_size=self.CHUNK_SIZE): - fd.write(chunk) - except Exception as ex: - os.unlink(fname) - raise ex - - return fname - - def generate_files(self, conf): - wdl_file = self.fetch_release_file(conf["wdl"], suffix=".wdl") - bundle_file = self.fetch_release_file("bundle.zip", suffix=".zip") - files = { - "workflowSource": open(wdl_file), - "workflowDependencies": open(bundle_file, "rb"), - "workflowInputs": open(_json_tmp(self._generate_inputs())), - "labels": open(_json_tmp(self._generate_labels())), - } - if self.options: - files["workflowOptions"] = open(self.options) - return files - - def cromwell_submit(self, force=False): - # Refresh the log - status = self.check_status() - states = ["Failed", "Aborted", "Aborting", "Unsubmitted"] - if not force and status not in states: - logging.info("Skipping: %s %s" % (self.activity_id, status)) - return - - cleanup = [] - conf = self.workflow_config - try: - self.json_log(self._generate_inputs(), title="Inputs") - self.json_log(self._generate_labels(), title="Labels") - files = self.generate_files(conf) - cleanup.extend(files.values()) - - job_id = "unknown" - if not self.dryrun: - logging.debug(self.cromwell_url) - resp = requests.post(self.cromwell_url, data={}, files=files) - resp.raise_for_status() - data = resp.json() - self.json_log(data, title="Response") - job_id = data["id"] + + data_objects = [] + + for output_spec in self.workflow.data_outputs: # specs are defined in the workflow.yaml file under Outputs + output_key = f"{self.workflow.input_prefix}.{output_spec['output']}" + if output_key not in self.job.outputs: + if output_spec.get("optional"): + logging.debug(f"Optional output {output_key} not found in job outputs") + continue + else: + logging.warning(f"Required output {output_key} not found in job outputs") + continue + # get the full path to the output file from the job_runner + output_file_path = Path(self.job.outputs[output_key]) + + + md5_sum = _md5(output_file_path) + file_url = f"{self.url_root}/{self.was_informed_by}/{self.workflow_execution_id}/{output_file_path.name}" + + # copy the file to the output directory if provided + new_output_file_path = None + if output_dir: + new_output_file_path = Path(output_dir) / output_file_path.name + # copy the file to the output directory + shutil.copy(output_file_path, new_output_file_path) else: - job_id = "dryrun" - - logging.info(f"Submitted: {job_id}") - self.start = datetime.datetime.now(pytz.utc).isoformat() - self.jobid = job_id - self.done = False - finally: - for file in cleanup: - file.close() - os.unlink(file.name) - -# TODO: Rename this class to something descriptive - it is responsible for creating NMDC database objects - -# the existing name is already taken by the NMDC schema module. -# Consider renaming to NMDCDatabaseObjectCreator. -# Add type hints to all methods, add docstrings to all methods. -class NmdcSchema: - def __init__(self): - self.nmdc_db = nmdc.Database() - self._data_object_string = "nmdc:DataObject" - self.activity_store = self.activity_map() - - def make_data_object( - self, - name: str, - full_file_name: str, - file_url: str, - data_object_type: str, - dobj_id: str, - md5_sum: str, - description: str, - omics_id: str, - ) -> None: - """Create nmdc database data object - - Args: - name (str): name of data object - full_file_name (str): full file name - file_url (str): url for data object file - data_object_type (str): nmdc data object type - dobj_id (str): minted data object id - md5_sum (str): md5 check sum of data product - description (str): description for data object - omics_id (str): minted omics id - """ + logging.warning(f"Output directory not provided, not copying {output_file_path} to output directory") - self.nmdc_db.data_object_set.append( - nmdc.DataObject( - file_size_bytes=os.stat(full_file_name).st_size, - name=name, + # create a DataObject object + data_object = DataObject( + id = output_spec["id"], + name=output_file_path.name, + type="nmdc:DataObject", url=file_url, - data_object_type=data_object_type, - type=self._data_object_string, - id=dobj_id, + data_object_type=output_spec["data_object_type"], md5_checksum=md5_sum, - description=description.replace("{id}", omics_id), + description=output_spec["description"], + was_generated_by=self.workflow_execution_id, ) - ) - - def create_activity_record( - self, - activity_record, - activity_name, - workflow, - activity_id, - resource, - has_inputs_list, - has_output_list, - omic_id, - start_time, - end_time, - ): - database_activity_set = self.activity_store[activity_record][0] - - database_activity_range = self.activity_store[activity_record][1] - - database_activity_set.append( - database_activity_range( - id=activity_id, # call minter for activity type - name=activity_name, - git_url=workflow["git_repo"], - version=workflow["release"], - execution_resource=resource, - started_at_time=start_time, - has_input=has_inputs_list, - has_output=has_output_list, - type=activity_record, - ended_at_time=end_time, - was_informed_by=omic_id, - ) - ) - def activity_map(self): - """ - Inform Object Mapping Process what activies need to be imported and - distrubuted across the process - """ + data_objects.append(data_object) + return data_objects - activity_store_dict = { - #TODO deprecate MetagenomeSequencing - "nmdc:MetagenomeSequencing": ( - self.nmdc_db.workflow_execution_set, - nmdc.MetagenomeSequencing, - ), - "nmdc:ReadQcAnalysis": ( - self.nmdc_db.workflow_execution_set, - nmdc.ReadQcAnalysis, - ), - "nmdc:ReadBasedTaxonomyAnalysis": ( - self.nmdc_db.workflow_execution_set, - nmdc.ReadBasedTaxonomyAnalysis, - ), - "nmdc:MetagenomeAssembly": ( - self.nmdc_db.workflow_execution_set, - nmdc.MetagenomeAssembly, - ), - "nmdc:MetatranscriptomeAssembly": ( - self.nmdc_db.workflow_execution_set, - nmdc.MetatranscriptomeAssembly, - ), - "nmdc:MetagenomeAnnotation": ( - self.nmdc_db.workflow_execution_set, - nmdc.MetagenomeAnnotation, - ), - "nmdc:MetatranscriptomeAnnotation": ( - self.nmdc_db.workflow_execution_set, - nmdc.MetatranscriptomeAnnotation, - ), - "nmdc:MagsAnalysis": ( - self.nmdc_db.workflow_execution_set, - nmdc.MagsAnalysis, - ), - "nmdc:MetatranscriptomeExpressionAnalysis": ( - self.nmdc_db.workflow_execution_set, - nmdc.MetatranscriptomeExpressionAnalysis, - ), - } - - return activity_store_dict - - def get_database_object_dump(self): + def make_workflow_execution_record(self, data_objects: List[DataObject]) -> Dict[str, Any]: """ - Get the NMDC database object. - - Returns: - nmdc.Database: NMDC database object. + Create a workflow execution record for the job """ - nmdc_database_object = json_dumper.dumps(self.nmdc_db, inject_type=False) - return nmdc_database_object + wf_dict = self.as_workflow_execution_dict + wf_dict["has_output"] = [dobj.id for dobj in data_objects] + + # workflow-specific keys + logical_names = set() + field_names = set() + pattern = r'\{outputs\.(\w+)\.(\w+)\}' + for attr_key, attr_val in self.workflow.execution_template.items(): + if attr_val.startswith("{outputs."): + match = re.match(pattern, attr_val) + if not match: + logging.warning(f"Invalid output reference {attr_val}") + continue + logical_names.add(match.group(1)) + field_names.add(match.group(2)) + + for logical_name in logical_names: + output_key = f"{self.workflow.input_prefix}.{logical_name}" + data_path = self.job.outputs.get(output_key) + if data_path: + # read in as json + with open(data_path) as f: + data = json.load(f) + for field_name in field_names: + # add to wf_dict if it has a value + if field_name in data: + wf_dict[field_name] = data[field_name] + else: + logging.warning(f"Field {field_name} not found in {data_path}") + + return wf_dict def _json_tmp(data): diff --git a/nmdc_automation/workflow_automation/workflows.py b/nmdc_automation/workflow_automation/workflows.py index df586fea..acefd44b 100644 --- a/nmdc_automation/workflow_automation/workflows.py +++ b/nmdc_automation/workflow_automation/workflows.py @@ -1,14 +1,13 @@ """ This module reads the workflows yaml file and returns a list of WorkflowConfig objects""" from yaml import load - try: from yaml import CLoader as Loader except ImportError: from yaml import Loader -import sys from nmdc_automation.workflow_automation.models import WorkflowConfig + def load_workflow_configs(yaml_file) -> list[WorkflowConfig]: """ Read the workflows yaml file and return a list of WorkflowConfig objects @@ -28,8 +27,3 @@ def load_workflow_configs(yaml_file) -> list[WorkflowConfig]: wf.add_child(wf2) wf2.add_parent(wf) return workflow_configs - - -if __name__ == "__main__": - wff = sys.argv[1] - load_workflow_configs(wff) diff --git a/tests/conftest.py b/tests/conftest.py index e14ef327..ac83981a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,15 +3,35 @@ from pymongo import MongoClient from pathlib import Path from pytest import fixture +import shutil from time import time -from yaml import load +from unittest.mock import Mock +from yaml import load, Loader -from nmdc_automation.config import Config +from nmdc_automation.config import SiteConfig +from nmdc_automation.workflow_automation.models import WorkflowConfig +from tests.fixtures import db_utils +from nmdc_automation.workflow_automation.wfutils import WorkflowJob +@fixture(scope="session") +def mock_job_state(): + state = db_utils.read_json( + "mags_workflow_state.json" + ) + return state -@fixture +@fixture(scope="session") +def mags_config(fixtures_dir)->WorkflowConfig: + yaml_file = fixtures_dir / "mags_config.yaml" + wf = load(open(yaml_file), Loader) + # normalize the keys from Key Name to key_name + wf = {k.replace(" ", "_").lower(): v for k, v in wf.items()} + return WorkflowConfig(**wf) + + +@fixture(scope="session") def test_db(): conn_str = os.environ.get("MONGO_URL", "mongodb://localhost:27017") return MongoClient(conn_str).test @@ -48,7 +68,9 @@ def base_test_dir(): @fixture(scope="session") def fixtures_dir(base_test_dir): - return base_test_dir / "fixtures" + path = base_test_dir / "fixtures" + # get the absolute path + return path.resolve() @fixture(scope="session") def test_data_dir(base_test_dir): @@ -58,15 +80,19 @@ def test_data_dir(base_test_dir): def workflows_config_dir(base_test_dir): return base_test_dir.parent / "nmdc_automation/config/workflows" -@fixture(scope="session") -def import_config_dir(base_test_dir): - return base_test_dir.parent / "configs" - @fixture(scope="session") -def site_config(base_test_dir): +def site_config_file(base_test_dir): return base_test_dir / "site_configuration_test.toml" @fixture(scope="session") -def job_config(site_config): - return Config(site_config) +def site_config(site_config_file): + return SiteConfig(site_config_file) + +@fixture +def initial_state_file(fixtures_dir, tmp_path): + state_file = fixtures_dir / "initial_state.json" + # make a working copy in tmp_path + copied_state_file = tmp_path / "initial_state.json" + shutil.copy(state_file, copied_state_file) + return copied_state_file diff --git a/tests/fixtures/cromwell_metadata.json b/tests/fixtures/cromwell_metadata.json new file mode 100644 index 00000000..2a6f1171 --- /dev/null +++ b/tests/fixtures/cromwell_metadata.json @@ -0,0 +1,78 @@ +{ + "id": "34b41f4a-fe50-4c00-bb60-444104b4c024", + "status": "Succeeded", + "start": "2023-09-01T10:00:00.000Z", + "end": "2023-09-01T12:00:00.000Z", + "workflowName": "example_workflow", + "submittedFiles": { + "workflow": "/path/to/workflow.wdl", + "inputs": { + "example_workflow.input_file": "/path/to/input.txt" + }, + "options": "/path/to/options.json", + "labels": { + "project": "example_project", + "version": "1.0" + } + }, + "inputs": { + "example_workflow.input_file": "/path/to/input.txt" + }, + "outputs": { + "nmdc_mags.final_checkm": "./outputs/final_checkm.json" + }, + "calls": { + "example_workflow.task1": [ + { + "shardIndex": -1, + "attempt": 1, + "executionStatus": "Done", + "start": "2023-09-01T10:10:00.000Z", + "end": "2023-09-01T10:20:00.000Z", + "stdout": "/path/to/stdout", + "stderr": "/path/to/stderr", + "backendLogs": { + "log": "/path/to/backend_log" + }, + "outputs": { + "task1_output": "/path/to/task1_output.txt" + }, + "backendStatus": "Done", + "runtimeAttributes": { + "cpu": "1", + "memory": "2GB", + "docker": "example_docker_image" + } + } + ], + "example_workflow.task2": [ + { + "shardIndex": -1, + "attempt": 1, + "executionStatus": "Done", + "start": "2023-09-01T10:30:00.000Z", + "end": "2023-09-01T10:40:00.000Z", + "stdout": "/path/to/stdout", + "stderr": "/path/to/stderr", + "backendLogs": { + "log": "/path/to/backend_log" + }, + "outputs": { + "task2_output": "/path/to/task2_output.txt" + }, + "backendStatus": "Done", + "runtimeAttributes": { + "cpu": "2", + "memory": "4GB", + "docker": "example_docker_image" + } + } + ] + }, + "workflowRoot": "/path/to/workflow/root", + "labels": { + "project": "example_project", + "version": "1.0" + }, + "submission": "2023-09-01T09:50:00.000Z" +} diff --git a/tests/fixtures/failed_job_state.json b/tests/fixtures/failed_job_state.json new file mode 100644 index 00000000..64f84cca --- /dev/null +++ b/tests/fixtures/failed_job_state.json @@ -0,0 +1,216 @@ +{ + "type": "MAGs: v1.3.10", + "cromwell_jobid": "9492a397-eb30-472b-9d3b-abc123456789", + "nmdc_jobid": "nmdc:66cf64b6-7462-11ef-8b84-abc123456789", + "conf": { + "git_repo": "https://github.com/microbiomedata/metaMAGs", + "release": "v1.3.10", + "wdl": "mbin_nmdc.wdl", + "activity_id": "nmdc:wfmag-11-g7msr323.1", + "activity_set": "mags_activity_set", + "was_informed_by": "nmdc:omprc-11-9cdxha98", + "trigger_activity": "nmdc:wfmgan-11-jv8kx789.1", + "iteration": 1, + "input_prefix": "nmdc_mags", + "inputs": { + "proj": "nmdc:wfmag-11-g7msr323.1", + "contig_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "sam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgas-11-0qvjnc54.1/nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "gff_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "proteins_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "cog_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "ec_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "ko_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "pfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "tigrfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "crispr_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "product_names_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "gene_phylogeny_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "lineage_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "map_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv" + }, + "input_data_objects": [ + { + "id": "nmdc:dobj-11-1x850k20", + "name": "nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "description": "Assembly contigs (remapped) for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "md5_checksum": "6debed079383eeca2045ce23b0576607", + "file_size_bytes": 2084209623, + "data_object_type": "Assembly Contigs" + }, + { + "id": "nmdc:dobj-11-fkj2kt47", + "name": "nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-9cdxha98", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgas-11-0qvjnc54.1/nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "md5_checksum": "88ec004bd037a3820060427098798666", + "file_size_bytes": 15704979428, + "data_object_type": "Assembly Coverage BAM" + }, + { + "id": "nmdc:dobj-11-f9rnav80", + "name": "nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "description": "Functional Annotation for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "md5_checksum": "349cae9b4fe62bb910f08a183e57b475", + "file_size_bytes": 1320869282, + "data_object_type": "Functional Annotation GFF" + }, + { + "id": "nmdc:dobj-11-btqzf393", + "name": "nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "description": "FASTA Amino Acid File for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "md5_checksum": "292eae73923605dae2ef9f5d582e4603", + "file_size_bytes": 1075716574, + "data_object_type": "Annotation Amino Acid FASTA" + }, + { + "id": "nmdc:dobj-11-hdty3m42", + "name": "nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "description": "COGs for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "md5_checksum": "c4d1121c1ceb1229afb7190d23553003", + "file_size_bytes": 712459544, + "data_object_type": "Clusters of Orthologous Groups (COG) Annotation GFF" + }, + { + "id": "nmdc:dobj-11-0gk70187", + "name": "nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "description": "EC Annotations for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "md5_checksum": "84cf22f39532e1bd001bea8425735a82", + "file_size_bytes": 116429630, + "data_object_type": "Annotation Enzyme Commission" + }, + { + "id": "nmdc:dobj-11-3mtmhf26", + "name": "nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "description": "KEGG Orthology for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "md5_checksum": "17d699df17c97fc28796a198cf40a328", + "file_size_bytes": 169182276, + "data_object_type": "Annotation KEGG Orthology" + }, + { + "id": "nmdc:dobj-11-7kfhf682", + "name": "nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "description": "Pfam Annotation for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "md5_checksum": "23c33758dc138e1af0f39fa1f3ca07db", + "file_size_bytes": 602929841, + "data_object_type": "Pfam Annotation GFF" + }, + { + "id": "nmdc:dobj-11-9hjg8y84", + "name": "nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "description": "TIGRFam for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "md5_checksum": "bbfded219e0b359602725c9efb4f0c54", + "file_size_bytes": 61788991, + "data_object_type": "TIGRFam Annotation GFF" + }, + { + "id": "nmdc:dobj-11-2x0wy902", + "name": "nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "description": "Crispr Terms for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "md5_checksum": "9d2255a63e39552328c4da20ccf2bb3f", + "file_size_bytes": 142989, + "data_object_type": "Crispr Terms" + }, + { + "id": "nmdc:dobj-11-r0bx4g71", + "name": "nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "description": "Product names for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "md5_checksum": "6f1325b2f8dee9b2a75598fb9645c43d", + "file_size_bytes": 401118634, + "data_object_type": "Product Names" + }, + { + "id": "nmdc:dobj-11-7mj15p44", + "name": "nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "description": "Gene Phylogeny for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "md5_checksum": "037aee803f1b81ac5ac1bccb9a18527d", + "file_size_bytes": 748420652, + "data_object_type": "Gene Phylogeny tsv" + }, + { + "id": "nmdc:dobj-11-r2zqpy26", + "name": "nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "description": "Scaffold Lineage tsv for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "md5_checksum": "efdce9771cdda8bd8548e44ef6d1d3a3", + "file_size_bytes": 503898615, + "data_object_type": "Scaffold Lineage tsv" + }, + { + "id": "nmdc:dobj-11-4k2bt072", + "name": "nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv", + "description": "Contig mappings file for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv", + "md5_checksum": "1056a6ef48ce9124de0828ee85246e65", + "file_size_bytes": 250129248, + "data_object_type": "Contig Mapping File" + } + ], + "activity": { + "name": "Metagenome Assembled Genomes Analysis Activity for {id}", + "type": "nmdc:MagsAnalysisActivity", + "binned_contig_num": "{outputs.final_stats_json.binned_contig_num}", + "input_contig_num": "{outputs.final_stats_json.input_contig_num}", + "low_depth_contig_num": "{outputs.final_stats_json.low_depth_contig_num}", + "mags_list": "{outputs.final_stats_json.mags_list}", + "too_short_contig_num": "{outputs.final_stats_json.too_short_contig_num}", + "unbinned_contig_num": "{outputs.final_stats_json.unbinned_contig_num}" + }, + "outputs": [ + { + "output": "final_checkm", + "data_object_type": "CheckM Statistics", + "description": "CheckM for {id}", + "name": "CheckM statistics report", + "id": "nmdc:dobj-11-xvjz5h55" + }, + { + "output": "final_hqmq_bins_zip", + "data_object_type": "Metagenome Bins", + "description": "Metagenome Bins for {id}", + "name": "Metagenome bin tarfiles archive", + "id": "nmdc:dobj-11-85q1v678" + }, + { + "output": "final_gtdbtk_bac_summary", + "data_object_type": "GTDBTK Bacterial Summary", + "description": "Bacterial Summary for {id}", + "name": "GTDBTK bacterial summary", + "id": "nmdc:dobj-11-j5p58211" + }, + { + "output": "final_gtdbtk_ar_summary", + "data_object_type": "GTDBTK Archaeal Summary", + "description": "Archaeal Summary for {id}", + "name": "GTDBTK archaeal summary", + "suffix": "_gtdbtk.ar122.summary.tsv", + "id": "nmdc:dobj-11-ec2fqk35" + }, + { + "output": "mags_version", + "data_object_type": "Metagenome Bins Info File", + "description": "Metagenome Bins Info File for {id}", + "name": "Metagenome Bins Info File", + "id": "nmdc:dobj-11-kg68h909" + } + ] + }, + "activity_id": "nmdc:wfmag-11-g7msr323.1", + "last_status": "Failed", + "done": true, + "failed_count": 2, + "start": "2024-09-16T19:33:32.562412+00:00", + "end": "2024-09-16T21:52:12.873101+00:00", + "opid": "nmdc:wfmag-11-g7msr323.1" + } \ No newline at end of file diff --git a/tests/fixtures/initial_state.json b/tests/fixtures/initial_state.json new file mode 100644 index 00000000..aa7dfc62 --- /dev/null +++ b/tests/fixtures/initial_state.json @@ -0,0 +1,220 @@ +{ + "jobs": [ + { + "type": "MAGs: v1.3.10", + "cromwell_jobid": "9492a397-eb30-472b-9d3b-abc123456789", + "nmdc_jobid": "nmdc:66cf64b6-7462-11ef-8b84-abc123456789", + "conf": { + "git_repo": "https://github.com/microbiomedata/metaMAGs", + "release": "v1.3.10", + "wdl": "mbin_nmdc.wdl", + "activity_id": "nmdc:wfmag-11-g7msr323.1", + "activity_set": "mags_activity_set", + "was_informed_by": "nmdc:omprc-11-9cdxha98", + "trigger_activity": "nmdc:wfmgan-11-jv8kx789.1", + "iteration": 1, + "input_prefix": "nmdc_mags", + "inputs": { + "proj": "nmdc:wfmag-11-g7msr323.1", + "contig_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "sam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgas-11-0qvjnc54.1/nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "gff_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "proteins_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "cog_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "ec_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "ko_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "pfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "tigrfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "crispr_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "product_names_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "gene_phylogeny_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "lineage_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "map_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv" + }, + "input_data_objects": [ + { + "id": "nmdc:dobj-11-1x850k20", + "name": "nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "description": "Assembly contigs (remapped) for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "md5_checksum": "6debed079383eeca2045ce23b0576607", + "file_size_bytes": 2084209623, + "data_object_type": "Assembly Contigs" + }, + { + "id": "nmdc:dobj-11-fkj2kt47", + "name": "nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-9cdxha98", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgas-11-0qvjnc54.1/nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "md5_checksum": "88ec004bd037a3820060427098798666", + "file_size_bytes": 15704979428, + "data_object_type": "Assembly Coverage BAM" + }, + { + "id": "nmdc:dobj-11-f9rnav80", + "name": "nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "description": "Functional Annotation for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "md5_checksum": "349cae9b4fe62bb910f08a183e57b475", + "file_size_bytes": 1320869282, + "data_object_type": "Functional Annotation GFF" + }, + { + "id": "nmdc:dobj-11-btqzf393", + "name": "nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "description": "FASTA Amino Acid File for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "md5_checksum": "292eae73923605dae2ef9f5d582e4603", + "file_size_bytes": 1075716574, + "data_object_type": "Annotation Amino Acid FASTA" + }, + { + "id": "nmdc:dobj-11-hdty3m42", + "name": "nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "description": "COGs for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "md5_checksum": "c4d1121c1ceb1229afb7190d23553003", + "file_size_bytes": 712459544, + "data_object_type": "Clusters of Orthologous Groups (COG) Annotation GFF" + }, + { + "id": "nmdc:dobj-11-0gk70187", + "name": "nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "description": "EC Annotations for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "md5_checksum": "84cf22f39532e1bd001bea8425735a82", + "file_size_bytes": 116429630, + "data_object_type": "Annotation Enzyme Commission" + }, + { + "id": "nmdc:dobj-11-3mtmhf26", + "name": "nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "description": "KEGG Orthology for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "md5_checksum": "17d699df17c97fc28796a198cf40a328", + "file_size_bytes": 169182276, + "data_object_type": "Annotation KEGG Orthology" + }, + { + "id": "nmdc:dobj-11-7kfhf682", + "name": "nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "description": "Pfam Annotation for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "md5_checksum": "23c33758dc138e1af0f39fa1f3ca07db", + "file_size_bytes": 602929841, + "data_object_type": "Pfam Annotation GFF" + }, + { + "id": "nmdc:dobj-11-9hjg8y84", + "name": "nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "description": "TIGRFam for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "md5_checksum": "bbfded219e0b359602725c9efb4f0c54", + "file_size_bytes": 61788991, + "data_object_type": "TIGRFam Annotation GFF" + }, + { + "id": "nmdc:dobj-11-2x0wy902", + "name": "nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "description": "Crispr Terms for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "md5_checksum": "9d2255a63e39552328c4da20ccf2bb3f", + "file_size_bytes": 142989, + "data_object_type": "Crispr Terms" + }, + { + "id": "nmdc:dobj-11-r0bx4g71", + "name": "nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "description": "Product names for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "md5_checksum": "6f1325b2f8dee9b2a75598fb9645c43d", + "file_size_bytes": 401118634, + "data_object_type": "Product Names" + }, + { + "id": "nmdc:dobj-11-7mj15p44", + "name": "nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "description": "Gene Phylogeny for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "md5_checksum": "037aee803f1b81ac5ac1bccb9a18527d", + "file_size_bytes": 748420652, + "data_object_type": "Gene Phylogeny tsv" + }, + { + "id": "nmdc:dobj-11-r2zqpy26", + "name": "nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "description": "Scaffold Lineage tsv for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "md5_checksum": "efdce9771cdda8bd8548e44ef6d1d3a3", + "file_size_bytes": 503898615, + "data_object_type": "Scaffold Lineage tsv" + }, + { + "id": "nmdc:dobj-11-4k2bt072", + "name": "nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv", + "description": "Contig mappings file for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv", + "md5_checksum": "1056a6ef48ce9124de0828ee85246e65", + "file_size_bytes": 250129248, + "data_object_type": "Contig Mapping File" + } + ], + "activity": { + "name": "Metagenome Assembled Genomes Analysis Activity for {id}", + "type": "nmdc:MagsAnalysisActivity", + "binned_contig_num": "{outputs.final_stats_json.binned_contig_num}", + "input_contig_num": "{outputs.final_stats_json.input_contig_num}", + "low_depth_contig_num": "{outputs.final_stats_json.low_depth_contig_num}", + "mags_list": "{outputs.final_stats_json.mags_list}", + "too_short_contig_num": "{outputs.final_stats_json.too_short_contig_num}", + "unbinned_contig_num": "{outputs.final_stats_json.unbinned_contig_num}" + }, + "outputs": [ + { + "output": "final_checkm", + "data_object_type": "CheckM Statistics", + "description": "CheckM for {id}", + "name": "CheckM statistics report", + "id": "nmdc:dobj-11-xvjz5h55" + }, + { + "output": "final_hqmq_bins_zip", + "data_object_type": "Metagenome Bins", + "description": "Metagenome Bins for {id}", + "name": "Metagenome bin tarfiles archive", + "id": "nmdc:dobj-11-85q1v678" + }, + { + "output": "final_gtdbtk_bac_summary", + "data_object_type": "GTDBTK Bacterial Summary", + "description": "Bacterial Summary for {id}", + "name": "GTDBTK bacterial summary", + "id": "nmdc:dobj-11-j5p58211" + }, + { + "output": "final_gtdbtk_ar_summary", + "data_object_type": "GTDBTK Archaeal Summary", + "description": "Archaeal Summary for {id}", + "name": "GTDBTK archaeal summary", + "suffix": "_gtdbtk.ar122.summary.tsv", + "id": "nmdc:dobj-11-ec2fqk35" + }, + { + "output": "mags_version", + "data_object_type": "Metagenome Bins Info File", + "description": "Metagenome Bins Info File for {id}", + "name": "Metagenome Bins Info File", + "id": "nmdc:dobj-11-kg68h909" + } + ] + }, + "activity_id": "nmdc:wfmag-11-g7msr323.1", + "last_status": "Failed", + "done": false, + "failed_count": 1, + "start": "2024-09-16T19:33:32.562412+00:00", + "end": "2024-09-16T21:52:12.873101+00:00", + "opid": "nmdc:test-opid" + } + ] +} \ No newline at end of file diff --git a/tests/fixtures/jobs_api_response.json b/tests/fixtures/jobs_api_response.json new file mode 100644 index 00000000..b2d2dab9 --- /dev/null +++ b/tests/fixtures/jobs_api_response.json @@ -0,0 +1,579 @@ +{ + "resources": [ + { + "workflow": { + "id": "Metagenome Annotation: v1.1.0" + }, + "id": "nmdc:0003398c-48a8-11ef-bcec-52b18d4509d1", + "created_at": "2024-07-23T03:59:46", + "config": { + "git_repo": "https://github.com/microbiomedata/mg_annotation", + "release": "v1.1.0", + "wdl": "annotation_full.wdl", + "activity_id": "nmdc:wfmgan-11-fp07wg93.2", + "activity_set": "metagenome_annotation_activity_set", + "was_informed_by": "nmdc:omprc-11-dfzknb42", + "trigger_activity": "nmdc:wfmgas-11-tekfqa46.1", + "iteration": 2, + "input_prefix": "annotation", + "inputs": { + "input_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-dfzknb42/nmdc:wfmgas-11-tekfqa46.1/nmdc_wfmgas-11-tekfqa46.1_contigs.fna", + "imgap_project_id": "scaffold", + "proj": "nmdc:wfmgan-11-fp07wg93.2" + }, + "input_data_objects": [ + { + "id": "nmdc:dobj-11-g7y3gy61", + "name": "Final assembly contigs fasta", + "description": "Assembly contigs for nmdc:wfmgas-11-tekfqa46.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dfzknb42/nmdc:wfmgas-11-tekfqa46.1/nmdc_wfmgas-11-tekfqa46.1_contigs.fna", + "md5_checksum": "89b0300d904b16e46e9d749cf633a911", + "file_size_bytes": 714986508, + "data_object_type": "Assembly Contigs" + } + ], + "activity": { + "name": "Metagenome Annotation Analysis Activity for {id}", + "type": "nmdc:MetagenomeAnnotationActivity" + }, + "outputs": [ + { + "output": "proteins_faa", + "data_object_type": "Annotation Amino Acid FASTA", + "description": "FASTA Amino Acid File for {id}", + "name": "FASTA amino acid file for annotated proteins", + "id": "nmdc:dobj-11-4akzce87" + }, + { + "output": "structural_gff", + "data_object_type": "Structural Annotation GFF", + "description": "Structural Annotation for {id}", + "name": "GFF3 format file with structural annotations", + "id": "nmdc:dobj-11-ek6tfd02" + }, + { + "output": "functional_gff", + "data_object_type": "Functional Annotation GFF", + "description": "Functional Annotation for {id}", + "name": "GFF3 format file with functional annotations", + "id": "nmdc:dobj-11-bm1qkk71" + }, + { + "output": "ko_tsv", + "data_object_type": "Annotation KEGG Orthology", + "description": "KEGG Orthology for {id}", + "name": "Tab delimited file for KO annotation", + "id": "nmdc:dobj-11-8h1tb446" + }, + { + "output": "ec_tsv", + "data_object_type": "Annotation Enzyme Commission", + "description": "EC Annotations for {id}", + "name": "Tab delimited file for EC annotation", + "suffix": "_ec.tsv", + "id": "nmdc:dobj-11-b7y5vv18" + }, + { + "output": "lineage_tsv", + "data_object_type": "Scaffold Lineage tsv", + "description": "Scaffold Lineage tsv for {id}", + "name": "Phylogeny at the scaffold level", + "suffix": "_scaffold_lineage.tsv", + "id": "nmdc:dobj-11-fn3ba237" + }, + { + "output": "cog_gff", + "data_object_type": "Clusters of Orthologous Groups (COG) Annotation GFF", + "description": "COGs for {id}", + "name": "GFF3 format file with COGs", + "id": "nmdc:dobj-11-6q7fh110" + }, + { + "output": "pfam_gff", + "data_object_type": "Pfam Annotation GFF", + "description": "Pfam Annotation for {id}", + "name": "GFF3 format file with Pfam", + "id": "nmdc:dobj-11-d85sqw71" + }, + { + "output": "tigrfam_gff", + "data_object_type": "TIGRFam Annotation GFF", + "description": "TIGRFam for {id}", + "name": "GFF3 format file with TIGRfam", + "id": "nmdc:dobj-11-fjq3q643" + }, + { + "output": "smart_gff", + "data_object_type": "SMART Annotation GFF", + "description": "SMART Annotations for {id}", + "name": "GFF3 format file with SMART", + "id": "nmdc:dobj-11-dhfdb719" + }, + { + "output": "supfam_gff", + "data_object_type": "SUPERFam Annotation GFF", + "description": "SUPERFam Annotations for {id}", + "name": "GFF3 format file with SUPERFam", + "id": "nmdc:dobj-11-qfmw4879" + }, + { + "output": "cath_funfam_gff", + "data_object_type": "CATH FunFams (Functional Families) Annotation GFF", + "description": "CATH FunFams for {id}", + "name": "GFF3 format file with CATH FunFams", + "id": "nmdc:dobj-11-ssa39z59" + }, + { + "output": "crt_gff", + "data_object_type": "CRT Annotation GFF", + "description": "CRT Annotations for {id}", + "name": "GFF3 format file with CRT", + "id": "nmdc:dobj-11-4hsvsv34" + }, + { + "output": "genemark_gff", + "data_object_type": "Genemark Annotation GFF", + "description": "Genemark Annotations for {id}", + "name": "GFF3 format file with Genemark", + "id": "nmdc:dobj-11-saaeyc80" + }, + { + "output": "prodigal_gff", + "data_object_type": "Prodigal Annotation GFF", + "description": "Prodigal Annotations {id}", + "name": "GFF3 format file with Prodigal", + "id": "nmdc:dobj-11-2kastj13" + }, + { + "output": "trna_gff", + "data_object_type": "TRNA Annotation GFF", + "description": "TRNA Annotations {id}", + "name": "GFF3 format file with TRNA", + "id": "nmdc:dobj-11-240tpb83" + }, + { + "output": "final_rfam_gff", + "data_object_type": "RFAM Annotation GFF", + "description": "RFAM Annotations for {id}", + "name": "GFF3 format file with RFAM", + "id": "nmdc:dobj-11-vn35b118" + }, + { + "output": "ko_ec_gff", + "data_object_type": "KO_EC Annotation GFF", + "description": "KO_EC Annotations for {id}", + "name": "GFF3 format file with KO_EC", + "id": "nmdc:dobj-11-20eag911" + }, + { + "output": "product_names_tsv", + "data_object_type": "Product Names", + "description": "Product names for {id}", + "name": "Product names file", + "id": "nmdc:dobj-11-bnjjvh49" + }, + { + "output": "gene_phylogeny_tsv", + "data_object_type": "Gene Phylogeny tsv", + "description": "Gene Phylogeny for {id}", + "name": "Gene Phylogeny file", + "id": "nmdc:dobj-11-6hgrts53" + }, + { + "output": "crt_crisprs", + "data_object_type": "Crispr Terms", + "description": "Crispr Terms for {id}", + "name": "Crispr Terms", + "id": "nmdc:dobj-11-mghmgg98" + }, + { + "output": "stats_tsv", + "data_object_type": "Annotation Statistics", + "description": "Annotation Stats for {id}", + "name": "Annotation statistics report", + "id": "nmdc:dobj-11-x2bvpj13" + }, + { + "output": "renamed_fasta", + "name": "Renamed assembly contigs fasta", + "data_object_type": "Assembly Contigs", + "description": "Assembly contigs (remapped) for {id}", + "id": "nmdc:dobj-11-hwd2k772" + }, + { + "output": "map_file", + "data_object_type": "Contig Mapping File", + "description": "Contig mappings file for {id}", + "name": "Contig mappings between contigs and scaffolds", + "suffix": "_contig_names_mapping.tsv", + "optional": true, + "id": "nmdc:dobj-11-6r54q802" + }, + { + "output": "imgap_version", + "data_object_type": "Annotation Info File", + "description": "Annotation info for {id}", + "name": "File containing annotation info", + "id": "nmdc:dobj-11-4mr0ae56" + } + ] + }, + "claims": [ + { + "op_id": "nmdc:sys0egpxjn25", + "site_id": "NERSC" + } + ] + }, + { + "workflow": { + "id": "Metagenome Assembly: v1.0.3" + }, + "id": "nmdc:00044f52-833c-11ee-bd0f-067aae39013b", + "created_at": "2023-11-14T22:20:21", + "config": { + "git_repo": "https://github.com/microbiomedata/metaAssembly", + "release": "v1.0.3", + "wdl": "jgi_assembly.wdl", + "activity_id": "nmdc:wfmgas-11-wtz4rz76.1", + "activity_set": "metagenome_assembly_set", + "was_informed_by": "nmdc:omprc-12-5vn1nh02", + "trigger_activity": "nmdc:wfrqc-11-4eethb84.1", + "iteration": 1, + "input_prefix": "jgi_metaASM", + "inputs": { + "input_file": "https://data.microbiomedata.org/data/nmdc:omprc-12-5vn1nh02/nmdc:wfrqc-11-4eethb84.1/nmdc_wfrqc-11-4eethb84.1_filtered.fastq.gz", + "rename_contig_prefix": "nmdc:wfmgas-11-wtz4rz76.1", + "proj": "nmdc:wfmgas-11-wtz4rz76.1" + }, + "input_data_objects": [ + { + "id": "nmdc:dobj-11-mh9dma28", + "name": "nmdc_wfrqc-11-4eethb84.1_filtered.fastq.gz", + "description": "Reads QC for nmdc:wfrqc-11-4eethb84.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-12-5vn1nh02/nmdc:wfrqc-11-4eethb84.1/nmdc_wfrqc-11-4eethb84.1_filtered.fastq.gz", + "md5_checksum": "3e60e2633256727a38a3c3b368a01732", + "file_size_bytes": 2218610459, + "data_object_type": "Filtered Sequencing Reads" + } + ], + "activity": { + "name": "Metagenome Assembly Activity for {id}", + "type": "nmdc:MetagenomeAssembly", + "asm_score": "{outputs.stats.asm_score}", + "contig_bp": "{outputs.stats.contig_bp}", + "contigs": "{outputs.stats.contigs}", + "ctg_l50": "{outputs.stats.ctg_l50}", + "ctg_l90": "{outputs.stats.ctg_l90}", + "ctg_logsum": "{outputs.stats.ctg_logsum}", + "ctg_max": "{outputs.stats.ctg_max}", + "ctg_n50": "{outputs.stats.ctg_n50}", + "ctg_n90": "{outputs.stats.ctg_n90}", + "ctg_powsum": "{outputs.stats.ctg_powsum}", + "gap_pct": "{outputs.stats.gap_pct}", + "gc_avg": "{outputs.stats.gc_avg}", + "gc_std": "{outputs.stats.gc_std}", + "scaf_bp": "{outputs.stats.scaf_bp}", + "scaf_l50": "{outputs.stats.scaf_l50}", + "scaf_l90": "{outputs.stats.scaf_l90}", + "scaf_l_gt50k": "{outputs.stats.scaf_l_gt50k}", + "scaf_logsum": "{outputs.stats.scaf_logsum}", + "scaf_max": "{outputs.stats.scaf_max}", + "scaf_n50": "{outputs.stats.scaf_n50}", + "scaf_n90": "{outputs.stats.scaf_n90}", + "scaf_n_gt50k": "{outputs.stats.scaf_n_gt50k}", + "scaf_pct_gt50k": "{outputs.stats.scaf_pct_gt50k}", + "scaf_powsum": "{outputs.stats.scaf_powsum}", + "scaffolds": "{outputs.stats.scaffolds}" + }, + "outputs": [ + { + "output": "contig", + "name": "Final assembly contigs fasta", + "suffix": "_contigs.fna", + "data_object_type": "Assembly Contigs", + "description": "Assembly contigs for {id}", + "id": "nmdc:dobj-11-xgrxb861" + }, + { + "output": "scaffold", + "name": "Final assembly scaffolds fasta", + "suffix": "_scaffolds.fna", + "data_object_type": "Assembly Scaffolds", + "description": "Assembly scaffolds for {id}", + "id": "nmdc:dobj-11-s2dd2t90" + }, + { + "output": "covstats", + "name": "Assembled contigs coverage information", + "suffix": "_covstats.txt", + "data_object_type": "Assembly Coverage Stats", + "description": "Coverage Stats for {id}", + "id": "nmdc:dobj-11-3bkdmw35" + }, + { + "output": "agp", + "name": "An AGP format file that describes the assembly", + "suffix": "_assembly.agp", + "data_object_type": "Assembly AGP", + "description": "AGP for {id}", + "id": "nmdc:dobj-11-pyrthm29" + }, + { + "output": "bam", + "name": "Sorted bam file of reads mapping back to the final assembly", + "suffix": "_pairedMapped_sorted.bam", + "data_object_type": "Assembly Coverage BAM", + "description": "Sorted Bam for {id}", + "id": "nmdc:dobj-11-f5b4wy79" + }, + { + "output": "asminfo", + "name": "File containing assembly info", + "suffix": "_metaAsm.info", + "data_object_type": "Assembly Info File", + "description": "Assembly info for {id}", + "id": "nmdc:dobj-11-z9hckx95" + } + ] + }, + "claims": [ + { + "op_id": "nmdc:sys0b9ktag76", + "site_id": "NERSC" + } + ] + }, + { + "workflow": { + "id": "Metagenome Annotation: v1.0.4" + }, + "id": "nmdc:005782ec-4081-11ee-9be4-ee3fb66564cb", + "created_at": "2023-08-22T00:15:29", + "config": { + "git_repo": "https://github.com/microbiomedata/mg_annotation", + "release": "v1.0.4", + "wdl": "annotation_full.wdl", + "activity_id": "nmdc:wfmgan-11-5b6pg295.1", + "activity_set": "metagenome_annotation_activity_set", + "was_informed_by": "nmdc:omprc-11-dcd0jq29", + "trigger_activity": "nmdc:wfmgas-11-hdxbp548.1", + "iteration": 1, + "input_prefix": "annotation", + "inputs": { + "input_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-dcd0jq29/nmdc:wfmgas-11-hdxbp548.1/nmdc_wfmgas-11-hdxbp548.1_contigs.fna", + "imgap_project_id": "scaffold", + "proj": "nmdc:wfmgan-11-5b6pg295.1" + }, + "input_data_objects": [ + { + "id": "nmdc:dobj-11-598qwk38", + "name": "nmdc_wfmgas-11-hdxbp548.1_contigs.fna", + "description": "Assembly contigs for nmdc:wfmgas-11-hdxbp548.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-dcd0jq29/nmdc:wfmgas-11-hdxbp548.1/nmdc_wfmgas-11-hdxbp548.1_contigs.fna", + "md5_checksum": "4e6d4fcc9f330f6a616ddb9595ef9509", + "file_size_bytes": 18918050, + "data_object_type": "Assembly Contigs" + } + ], + "activity": { + "name": "Metagenome Annotation Analysis Activity for {id}", + "type": "nmdc:MetagenomeAnnotationActivity" + }, + "outputs": [ + { + "output": "proteins_faa", + "data_object_type": "Annotation Amino Acid FASTA", + "description": "FASTA Amino Acid File for {id}", + "name": "FASTA amino acid file for annotated proteins", + "suffix": "_proteins.faa", + "id": "nmdc:dobj-11-a1r5a733" + }, + { + "output": "structural_gff", + "data_object_type": "Structural Annotation GFF", + "description": "Structural Annotation for {id}", + "name": "GFF3 format file with structural annotations", + "suffix": "_structural_annotation.gff", + "id": "nmdc:dobj-11-tqtycq13" + }, + { + "output": "functional_gff", + "data_object_type": "Functional Annotation GFF", + "description": "Functional Annotation for {id}", + "name": "GFF3 format file with functional annotations", + "suffix": "_functional_annotation.gff", + "id": "nmdc:dobj-11-f97mze63" + }, + { + "output": "ko_tsv", + "data_object_type": "Annotation KEGG Orthology", + "description": "KEGG Orthology for {id}", + "name": "Tab delimited file for KO annotation", + "suffix": "_ko.tsv", + "id": "nmdc:dobj-11-3tgvxd08" + }, + { + "output": "ec_tsv", + "data_object_type": "Annotation Enzyme Commission", + "description": "EC Annotations for {id}", + "name": "Tab delimited file for EC annotation", + "suffix": "_ec.tsv", + "id": "nmdc:dobj-11-zxcnx432" + }, + { + "output": "lineage_tsv", + "data_object_type": "Scaffold Lineage tsv", + "description": "Scaffold Lineage tsv for {id}", + "name": "Phylogeny at the scaffold level", + "suffix": "_scaffold_lineage.tsv", + "id": "nmdc:dobj-11-zj305709" + }, + { + "output": "cog_gff", + "data_object_type": "Clusters of Orthologous Groups (COG) Annotation GFF", + "description": "COGs for {id}", + "name": "GFF3 format file with COGs", + "suffix": "_cog.gff", + "id": "nmdc:dobj-11-5fxvrv97" + }, + { + "output": "pfam_gff", + "data_object_type": "Pfam Annotation GFF", + "description": "Pfam Annotation for {id}", + "name": "GFF3 format file with Pfam", + "suffix": "_pfam.gff", + "id": "nmdc:dobj-11-9z6ah352" + }, + { + "output": "tigrfam_gff", + "data_object_type": "TIGRFam Annotation GFF", + "description": "TIGRFam for {id}", + "name": "GFF3 format file with TIGRfam", + "suffix": "_tigrfam.gff", + "id": "nmdc:dobj-11-yhfecr06" + }, + { + "output": "smart_gff", + "data_object_type": "SMART Annotation GFF", + "description": "SMART Annotations for {id}", + "name": "GFF3 format file with SMART", + "suffix": "_smart.gff", + "id": "nmdc:dobj-11-9gp4da96" + }, + { + "output": "supfam_gff", + "data_object_type": "SUPERFam Annotation GFF", + "description": "SUPERFam Annotations for {id}", + "name": "GFF3 format file with SUPERFam", + "suffix": "_supfam.gff", + "id": "nmdc:dobj-11-b54ak435" + }, + { + "output": "cath_funfam_gff", + "data_object_type": "CATH FunFams (Functional Families) Annotation GFF", + "description": "CATH FunFams for {id}", + "name": "GFF3 format file with CATH FunFams", + "suffix": "_cath_funfam.gff", + "id": "nmdc:dobj-11-r3b6fh65" + }, + { + "output": "crt_gff", + "data_object_type": "CRT Annotation GFF", + "description": "CRT Annotations for {id}", + "name": "GFF3 format file with CRT", + "suffix": "_crt.gff", + "id": "nmdc:dobj-11-s1770x66" + }, + { + "output": "genemark_gff", + "data_object_type": "Genemark Annotation GFF", + "description": "Genemark Annotations for {id}", + "name": "GFF3 format file with Genemark", + "suffix": "_genemark.gff", + "id": "nmdc:dobj-11-tfskm895" + }, + { + "output": "prodigal_gff", + "data_object_type": "Prodigal Annotation GFF", + "description": "Prodigal Annotations {id}", + "name": "GFF3 format file with Prodigal", + "suffix": "_prodigal.gff", + "id": "nmdc:dobj-11-0vt22n49" + }, + { + "output": "trna_gff", + "data_object_type": "TRNA Annotation GFF", + "description": "TRNA Annotations {id}", + "name": "GFF3 format file with TRNA", + "suffix": "_trna.gff", + "id": "nmdc:dobj-11-g1z3e990" + }, + { + "output": "final_rfam_gff", + "data_object_type": "RFAM Annotation GFF", + "description": "RFAM Annotations for {id}", + "name": "GFF3 format file with RFAM", + "suffix": "_rfam.gff", + "id": "nmdc:dobj-11-dpz65681" + }, + { + "output": "ko_ec_gff", + "data_object_type": "KO_EC Annotation GFF", + "description": "KO_EC Annotations for {id}", + "name": "GFF3 format file with KO_EC", + "suffix": "_ko_ec.gff", + "id": "nmdc:dobj-11-s64gp211" + }, + { + "output": "product_names_tsv", + "data_object_type": "Product Names", + "description": "Product names for {id}", + "name": "Product names file", + "suffix": "_product_names.tsv", + "id": "nmdc:dobj-11-7dfvzs81" + }, + { + "output": "gene_phylogeny_tsv", + "data_object_type": "Gene Phylogeny tsv", + "description": "Gene Phylogeny for {id}", + "name": "Gene Phylogeny file", + "suffix": "_gene_phylogeny.tsv", + "id": "nmdc:dobj-11-hagw0713" + }, + { + "output": "crt_crisprs", + "data_object_type": "Crispr Terms", + "description": "Crispr Terms for {id}", + "name": "Crispr Terms", + "suffix": "_crt.crisprs", + "id": "nmdc:dobj-11-nsnye718" + }, + { + "output": "stats_tsv", + "data_object_type": "Annotation Statistics", + "description": "Annotation Stats for {id}", + "name": "Annotation statistics report", + "suffix": "_stats.tsv", + "id": "nmdc:dobj-11-naqp2149" + }, + { + "output": "imgap_version", + "data_object_type": "Annotation Info File", + "description": "Annotation info for {id}", + "name": "File containing annotation info", + "suffix": "_imgap.info", + "id": "nmdc:dobj-11-z7vpmz16" + } + ] + }, + "claims": [ + { + "op_id": "nmdc:sys0dxyztg13", + "site_id": "NERSC" + } + ] + } + ], + "next_page_token": "nmdc:sys0c1zcq972" +} \ No newline at end of file diff --git a/tests/fixtures/mags_config.yaml b/tests/fixtures/mags_config.yaml new file mode 100644 index 00000000..f9bf02fb --- /dev/null +++ b/tests/fixtures/mags_config.yaml @@ -0,0 +1,76 @@ + - Name: MAGs + Type: nmdc:MagsAnalysis + Enabled: True + Analyte Category: Metagenome + Git_repo: https://github.com/microbiomedata/metaMAGs + Version: v1.3.10 + WDL: mbin_nmdc.wdl + Collection: workflow_execution_set + Predecessors: + - Metagenome Annotation + Input_prefix: nmdc_mags + Inputs: + proj: "{workflow_execution_id}" + contig_file: do:Assembly Contigs + sam_file: do:Assembly Coverage BAM + gff_file: do:Functional Annotation GFF + proteins_file: do:Annotation Amino Acid FASTA + cog_file: do:Clusters of Orthologous Groups (COG) Annotation GFF + ec_file: do:Annotation Enzyme Commission + ko_file: do:Annotation KEGG Orthology + pfam_file: do:Pfam Annotation GFF + tigrfam_file: do:TIGRFam Annotation GFF + crispr_file: do:Crispr Terms + product_names_file: do:Product Names + gene_phylogeny_file: do:Gene Phylogeny tsv + lineage_file: do:Scaffold Lineage tsv + map_file: do:Contig Mapping File + Optional Inputs: + - map_file + Workflow Execution: + name: "Metagenome Assembled Genomes Analysis for {id}" + type: nmdc:MagsAnalysis + binned_contig_num: "{outputs.final_stats_json.binned_contig_num}" + input_contig_num: "{outputs.final_stats_json.input_contig_num}" + low_depth_contig_num: "{outputs.final_stats_json.low_depth_contig_num}" + mags_list: "{outputs.final_stats_json.mags_list}" + too_short_contig_num: "{outputs.final_stats_json.too_short_contig_num}" + unbinned_contig_num: "{outputs.final_stats_json.unbinned_contig_num}" + Outputs: + - output: final_checkm + data_object_type: CheckM Statistics + description: CheckM for {id} + name: CheckM statistics report + - output: final_hqmq_bins_zip + data_object_type: Metagenome HQMQ Bins Compression File + description: Metagenome HQMQ Bins for {id} + name: Metagenome hqmq bin zip archive + - output: final_gtdbtk_bac_summary + data_object_type: GTDBTK Bacterial Summary + description: Bacterial Summary for {id} + name: GTDBTK bacterial summary + - output: final_gtdbtk_ar_summary + data_object_type: GTDBTK Archaeal Summary + description: Archaeal Summary for {id} + name: GTDBTK archaeal summary + suffix: _gtdbtk.ar122.summary.tsv + - output: mags_version + data_object_type: Metagenome Bins Info File + description: Metagenome Bins Info File for {id} + name: Metagenome Bins Info File + - output: final_lq_bins_zip + data_object_type: Metagenome LQ Bins Compression File + description: Metagenome LQ Bins for {id} + name: Metagenome lq bin zip archive + - output: heatmap + data_object_type: Metagenome Bins Heatmap + description: Metagenome heatmap for {id} + name: Metagenome Heatmap File + - output: barplot + data_object_type: Metagenome Bins Barplot + description: Metagenome barplot for {id} + name: Metagenome Barplot File + - output: kronaplot + data_object_type: Metagenome Bins Krona Plot + description: Metagenome Bins Krona Plot for {id} + name: Metagenome Krona Bins Plot File \ No newline at end of file diff --git a/tests/fixtures/mags_final_stats.json b/tests/fixtures/mags_final_stats.json new file mode 100644 index 00000000..2c1e53f3 --- /dev/null +++ b/tests/fixtures/mags_final_stats.json @@ -0,0 +1,189 @@ +{ + "input_contig_num": 2273412, + "too_short_contig_num": 2005162, + "low_depth_contig_num": 0, + "unbinned_contig_num": 241036, + "binned_contig_num": 27214, + "mags_list": [ + { + "bin_name": "bins.40", + "number_of_contig": 44, + "completeness": 97.3, + "contamination": 3.38, + "total_bases": 0, + "gene_count": "null", + "bin_quality": "MQ", + "num_16s": 0, + "num_5s": 0, + "num_23s": 0, + "num_tRNA": 0, + "gtdbtk_domain": "Bacteria", + "gtdbtk_phylum": "Verrucomicrobiota", + "gtdbtk_class": "Verrucomicrobiae", + "gtdbtk_order": "Pedosphaerales", + "gtdbtk_family": "UBA11358", + "gtdbtk_genus": "UBA11358", + "gtdbtk_species": "null", + "members_id": [ + "nmdc:wfmgas-13-56028x05.1_7_c1", + "nmdc:wfmgas-13-56028x05.1_9_c1", + "nmdc:wfmgas-13-56028x05.1_16_c1", + "nmdc:wfmgas-13-56028x05.1_20_c1", + "nmdc:wfmgas-13-56028x05.1_23_c1", + "nmdc:wfmgas-13-56028x05.1_27_c1", + "nmdc:wfmgas-13-56028x05.1_45_c1", + "nmdc:wfmgas-13-56028x05.1_55_c1", + "nmdc:wfmgas-13-56028x05.1_71_c1", + "nmdc:wfmgas-13-56028x05.1_79_c1", + "nmdc:wfmgas-13-56028x05.1_99_c1", + "nmdc:wfmgas-13-56028x05.1_52_c2", + "nmdc:wfmgas-13-56028x05.1_127_c1", + "nmdc:wfmgas-13-56028x05.1_131_c1", + "nmdc:wfmgas-13-56028x05.1_137_c1", + "nmdc:wfmgas-13-56028x05.1_169_c1", + "nmdc:wfmgas-13-56028x05.1_200_c1", + "nmdc:wfmgas-13-56028x05.1_212_c1", + "nmdc:wfmgas-13-56028x05.1_223_c1", + "nmdc:wfmgas-13-56028x05.1_372_c1", + "nmdc:wfmgas-13-56028x05.1_393_c1", + "nmdc:wfmgas-13-56028x05.1_428_c1", + "nmdc:wfmgas-13-56028x05.1_52_c1", + "nmdc:wfmgas-13-56028x05.1_582_c1", + "nmdc:wfmgas-13-56028x05.1_706_c1", + "nmdc:wfmgas-13-56028x05.1_888_c1", + "nmdc:wfmgas-13-56028x05.1_912_c1", + "nmdc:wfmgas-13-56028x05.1_1268_c1", + "nmdc:wfmgas-13-56028x05.1_1271_c1", + "nmdc:wfmgas-13-56028x05.1_1492_c1", + "nmdc:wfmgas-13-56028x05.1_1494_c1", + "nmdc:wfmgas-13-56028x05.1_1604_c1", + "nmdc:wfmgas-13-56028x05.1_1627_c1", + "nmdc:wfmgas-13-56028x05.1_1888_c1", + "nmdc:wfmgas-13-56028x05.1_1938_c1", + "nmdc:wfmgas-13-56028x05.1_2944_c1", + "nmdc:wfmgas-13-56028x05.1_3261_c1", + "nmdc:wfmgas-13-56028x05.1_3477_c1", + "nmdc:wfmgas-13-56028x05.1_4194_c1", + "nmdc:wfmgas-13-56028x05.1_6257_c1", + "nmdc:wfmgas-13-56028x05.1_7589_c1", + "nmdc:wfmgas-13-56028x05.1_10469_c1", + "nmdc:wfmgas-13-56028x05.1_10553_c1", + "nmdc:wfmgas-13-56028x05.1_13792_c1" + ] + }, + { + "bin_name": "bins.9", + "number_of_contig": 92, + "completeness": 0.0, + "contamination": 0.0, + "total_bases": 0, + "gene_count": "null", + "bin_quality": "LQ", + "num_16s": 0, + "num_5s": 0, + "num_23s": 0, + "num_tRNA": 0, + "gtdbtk_domain": "null", + "gtdbtk_phylum": "null", + "gtdbtk_class": "null", + "gtdbtk_order": "null", + "gtdbtk_family": "null", + "gtdbtk_genus": "null", + "gtdbtk_species": "null", + "members_id": [ + "nmdc:wfmgas-13-56028x05.1_7094_c1", + "nmdc:wfmgas-13-56028x05.1_9486_c1", + "nmdc:wfmgas-13-56028x05.1_9853_c1", + "nmdc:wfmgas-13-56028x05.1_10857_c1", + "nmdc:wfmgas-13-56028x05.1_11702_c1", + "nmdc:wfmgas-13-56028x05.1_12042_c1", + "nmdc:wfmgas-13-56028x05.1_14174_c1", + "nmdc:wfmgas-13-56028x05.1_14597_c1", + "nmdc:wfmgas-13-56028x05.1_16115_c1", + "nmdc:wfmgas-13-56028x05.1_16261_c1", + "nmdc:wfmgas-13-56028x05.1_16795_c1", + "nmdc:wfmgas-13-56028x05.1_16943_c1", + "nmdc:wfmgas-13-56028x05.1_17208_c1", + "nmdc:wfmgas-13-56028x05.1_17245_c1", + "nmdc:wfmgas-13-56028x05.1_17383_c1", + "nmdc:wfmgas-13-56028x05.1_17783_c1", + "nmdc:wfmgas-13-56028x05.1_18468_c1", + "nmdc:wfmgas-13-56028x05.1_18553_c1", + "nmdc:wfmgas-13-56028x05.1_18858_c1", + "nmdc:wfmgas-13-56028x05.1_19302_c1", + "nmdc:wfmgas-13-56028x05.1_19824_c1", + "nmdc:wfmgas-13-56028x05.1_20316_c1", + "nmdc:wfmgas-13-56028x05.1_20787_c1", + "nmdc:wfmgas-13-56028x05.1_21029_c1", + "nmdc:wfmgas-13-56028x05.1_21435_c1", + "nmdc:wfmgas-13-56028x05.1_21475_c1", + "nmdc:wfmgas-13-56028x05.1_21484_c1", + "nmdc:wfmgas-13-56028x05.1_21518_c1", + "nmdc:wfmgas-13-56028x05.1_21685_c1", + "nmdc:wfmgas-13-56028x05.1_21809_c1", + "nmdc:wfmgas-13-56028x05.1_21924_c1", + "nmdc:wfmgas-13-56028x05.1_21958_c1", + "nmdc:wfmgas-13-56028x05.1_22186_c1", + "nmdc:wfmgas-13-56028x05.1_22271_c1", + "nmdc:wfmgas-13-56028x05.1_22516_c1", + "nmdc:wfmgas-13-56028x05.1_22514_c1", + "nmdc:wfmgas-13-56028x05.1_22777_c1", + "nmdc:wfmgas-13-56028x05.1_23003_c1", + "nmdc:wfmgas-13-56028x05.1_23115_c1", + "nmdc:wfmgas-13-56028x05.1_23204_c1", + "nmdc:wfmgas-13-56028x05.1_23239_c1", + "nmdc:wfmgas-13-56028x05.1_23352_c1", + "nmdc:wfmgas-13-56028x05.1_23445_c1", + "nmdc:wfmgas-13-56028x05.1_23505_c1", + "nmdc:wfmgas-13-56028x05.1_23571_c1", + "nmdc:wfmgas-13-56028x05.1_24047_c1", + "nmdc:wfmgas-13-56028x05.1_24749_c1", + "nmdc:wfmgas-13-56028x05.1_24981_c1", + "nmdc:wfmgas-13-56028x05.1_25059_c1", + "nmdc:wfmgas-13-56028x05.1_25526_c1", + "nmdc:wfmgas-13-56028x05.1_26162_c1", + "nmdc:wfmgas-13-56028x05.1_26376_c1", + "nmdc:wfmgas-13-56028x05.1_26773_c1", + "nmdc:wfmgas-13-56028x05.1_26816_c1", + "nmdc:wfmgas-13-56028x05.1_26891_c1", + "nmdc:wfmgas-13-56028x05.1_27179_c1", + "nmdc:wfmgas-13-56028x05.1_27272_c1", + "nmdc:wfmgas-13-56028x05.1_27358_c1", + "nmdc:wfmgas-13-56028x05.1_27411_c1", + "nmdc:wfmgas-13-56028x05.1_27550_c1", + "nmdc:wfmgas-13-56028x05.1_28892_c1", + "nmdc:wfmgas-13-56028x05.1_29003_c1", + "nmdc:wfmgas-13-56028x05.1_29238_c1", + "nmdc:wfmgas-13-56028x05.1_29324_c1", + "nmdc:wfmgas-13-56028x05.1_29771_c1", + "nmdc:wfmgas-13-56028x05.1_29878_c1", + "nmdc:wfmgas-13-56028x05.1_30248_c1", + "nmdc:wfmgas-13-56028x05.1_30476_c1", + "nmdc:wfmgas-13-56028x05.1_30587_c1", + "nmdc:wfmgas-13-56028x05.1_31160_c1", + "nmdc:wfmgas-13-56028x05.1_31834_c1", + "nmdc:wfmgas-13-56028x05.1_31922_c1", + "nmdc:wfmgas-13-56028x05.1_31971_c1", + "nmdc:wfmgas-13-56028x05.1_32244_c1", + "nmdc:wfmgas-13-56028x05.1_32605_c1", + "nmdc:wfmgas-13-56028x05.1_32623_c1", + "nmdc:wfmgas-13-56028x05.1_32832_c1", + "nmdc:wfmgas-13-56028x05.1_33068_c1", + "nmdc:wfmgas-13-56028x05.1_33334_c1", + "nmdc:wfmgas-13-56028x05.1_33438_c1", + "nmdc:wfmgas-13-56028x05.1_33855_c1", + "nmdc:wfmgas-13-56028x05.1_34035_c1", + "nmdc:wfmgas-13-56028x05.1_34120_c1", + "nmdc:wfmgas-13-56028x05.1_34140_c1", + "nmdc:wfmgas-13-56028x05.1_34133_c1", + "nmdc:wfmgas-13-56028x05.1_34177_c1", + "nmdc:wfmgas-13-56028x05.1_34481_c1", + "nmdc:wfmgas-13-56028x05.1_34728_c1", + "nmdc:wfmgas-13-56028x05.1_34843_c1", + "nmdc:wfmgas-13-56028x05.1_35665_c1", + "nmdc:wfmgas-13-56028x05.1_35772_c1", + "nmdc:wfmgas-13-56028x05.1_35995_c1" + ] + } + ] +} \ No newline at end of file diff --git a/tests/fixtures/mags_job_metadata.json b/tests/fixtures/mags_job_metadata.json new file mode 100644 index 00000000..ed705cff --- /dev/null +++ b/tests/fixtures/mags_job_metadata.json @@ -0,0 +1,1010 @@ +{ + "workflowName": "nmdc_mags", + "workflowProcessingEvents": [ + { + "cromwellId": "cromid-083a56f", + "description": "PickedUp", + "timestamp": "2024-07-01T16:54:56.053Z", + "cromwellVersion": "77" + }, + { + "cromwellId": "cromid-083a56f", + "description": "Finished", + "timestamp": "2024-07-01T19:55:38.766Z", + "cromwellVersion": "77" + } + ], + "actualWorkflowLanguageVersion": "1.0", + "submittedFiles": { + "workflow": "version 1.0\nworkflow nmdc_mags {\n input {\n String proj\n String contig_file\n String sam_file\n String gff_file\n String proteins_file\n String cog_file\n String ec_file\n String ko_file\n String pfam_file\n String tigrfam_file\n String cath_funfam_file\n String smart_file\n String supfam_file\n String product_names_file\n String gene_phylogeny_file\n String lineage_file\n File? map_file\n String? scratch_dir\n Int cpu=32\n Int threads=64\n Int pthreads=1\n String gtdbtk_db=\"/refdata/GTDBTK_DB/gtdbtk_release207_v2\"\n String checkm_db=\"/refdata/checkM_DB/checkm_data_2015_01_16\"\n String eukcc2_db=\"/refdata/EUKCC2_DB/eukcc2_db_ver_1.2\"\n String package_container = \"microbiomedata/nmdc_mbin_vis:0.2.0\"\n String container = \"microbiomedata/nmdc_mbin@sha256:57930406fb5cc364bacfc904066519de6cdc2d0ceda9db0eebf2336df3ef5349\"\n }\n call stage {\n input:\n container=container,\n contig_file=contig_file,\n sam_file=sam_file,\n gff_file=gff_file,\n proteins_file=proteins_file,\n cog_file=cog_file,\n ec_file=ec_file,\n ko_file=ko_file,\n pfam_file=pfam_file,\n tigrfam_file=tigrfam_file,\n cath_funfam_file=cath_funfam_file,\n smart_file=smart_file,\n supfam_file=supfam_file,\n product_names_file=product_names_file,\n gene_phylogeny_file=gene_phylogeny_file,\n lineage_file=lineage_file\n }\n\n call mbin_nmdc {\n input: \n name=proj,\n fna = stage.contig,\n aln = stage.sam,\n gff = stage.gff,\n lineage=stage.lineage_tsv,\n threads = threads,\n pthreads = pthreads,\n gtdbtk_env = gtdbtk_db,\n checkm_env = checkm_db,\n eukcc2_env = eukcc2_db,\n map_file = map_file,\n mbin_container = container\n }\n call package {\n input: proj = proj,\n bins=flatten([mbin_nmdc.hqmq_bin_fasta_files,mbin_nmdc.lq_bin_fasta_files]),\n json_stats=mbin_nmdc.stats_json,\n gff_file=stage.gff,\n proteins_file=stage.proteins,\n cog_file=stage.cog,\n ec_file=stage.ec,\n ko_file=stage.ko,\n pfam_file=stage.pfam,\n tigrfam_file=stage.tigrfam,\n cath_funfam_file=stage.cath_funfam,\n smart_file=stage.smart,\n supfam_file=stage.supfam,\n product_names_file=stage.product_names,\n container=package_container\n }\n\n call finish_mags {\n input:\n container=\"microbiomedata/workflowmeta:1.1.1\",\n contigs=stage.contig,\n anno_gff=stage.gff,\n sorted_bam=stage.sam,\n proj=proj,\n start=stage.start,\n checkm = mbin_nmdc.checkm,\n bacsum= mbin_nmdc.bacsum,\n arcsum = mbin_nmdc.arcsum,\n short = mbin_nmdc.short,\n low = mbin_nmdc.low,\n unbinned = mbin_nmdc.unbinned,\n checkm = mbin_nmdc.checkm,\n mbin_sdb = mbin_nmdc.mbin_sdb,\n mbin_version = mbin_nmdc.mbin_version,\n stats_json = mbin_nmdc.stats_json,\n stats_tsv = mbin_nmdc.stats_tsv,\n hqmq_bin_fasta_files = mbin_nmdc.hqmq_bin_fasta_files,\n bin_fasta_files = mbin_nmdc.lq_bin_fasta_files,\n hqmq_bin_tarfiles = package.hqmq_bin_tarfiles,\n lq_bin_tarfiles = package.lq_bin_tarfiles,\n barplot = package.barplot,\n heatmap = package.heatmap,\n kronaplot = package.kronaplot,\n eukcc_file=mbin_nmdc.eukcc_csv,\n ko_matrix = package.ko_matrix\n }\n\n output {\n File final_hqmq_bins_zip = finish_mags.final_hqmq_bins_zip\n File final_lq_bins_zip = finish_mags.final_lq_bins_zip\n File final_gtdbtk_bac_summary = finish_mags.final_gtdbtk_bac_summary\n File final_gtdbtk_ar_summary = finish_mags.final_gtdbtk_ar_summary\n File short = finish_mags.final_short\n File low = finish_mags.final_lowDepth_fa\n File final_unbinned_fa = finish_mags.final_unbinned_fa\n File final_checkm = finish_mags.final_checkm\n File mags_version = finish_mags.final_version\n File final_stats_json = finish_mags.final_stats_json\n File barplot = finish_mags.final_barplot\n File heatmap = finish_mags.final_heatmap\n File kronaplot = finish_mags.final_kronaplot\n }\n\n\n}\n\ntask mbin_nmdc {\n input{\n File fna\n File aln\n File gff\n File lineage\n String name\n File? map_file\n Int? threads\n Int? pthreads\n String gtdbtk_env\n String checkm_env\n\t String? eukcc2_env\n String mbin_container\n }\n\n command<<<\n set -euo pipefail\n export GTDBTK_DATA_PATH=~{gtdbtk_env}\n export CHECKM_DATA_PATH=~{checkm_env}\n mbin.py ~{\"--threads \" + threads} ~{\"--pthreads \" + pthreads} ~{\"--map \" + map_file} ~{\"--eukccdb \" + eukcc2_env} --fna ~{fna} --gff ~{gff} --aln ~{aln} --lintsv ~{lineage}\n mbin_stats.py $PWD\n mbin_versions.py > mbin_nmdc_versions.log\n touch MAGs_stats.tsv\n \n if [ -f gtdbtk-output/gtdbtk.bac120.summary.tsv ]; then\n echo \"bacterial summary exists.\"\n else\n mkdir -p gtdbtk-output\n echo \"No Bacterial Results for ~{name}\" > gtdbtk-output/gtdbtk.bac120.summary.tsv\n fi\n\n if [ -f gtdbtk-output/gtdbtk.ar122.summary.tsv ]; then\n echo \"archaeal summary exists.\"\n else\n mkdir -p gtdbtk-output\n echo \"No Archaeal Results for ~{name}\" > gtdbtk-output/gtdbtk.ar122.summary.tsv\n fi\n\n if [ -f checkm-qa.out ]; then\n echo \"checkm summary exists.\"\n else\n mkdir -p gtdbtk-output\n echo \"No Checkm Results for ~{name}\" > checkm-qa.out\n fi\n\n if [ -f mbin.sdb ]; then\n echo \"mbin.sdb exists.\"\n else\n mkdir -p gtdbtk-output\n echo \"Mbin Sdb Could not be created for ~{name}\" > mbin.sdb\n fi\n\n if [ -f eukcc_output/eukcc.csv.final ]; then\n echo \"eukcc.csv.final exists.\"\n else\n mkdir -p eukcc_output\n echo \"No EUKCC2 result for ~{name}\" > eukcc_output/eukcc.csv.final\n fi\n >>>\n\n runtime{\n docker : mbin_container\n memory : \"120 G\"\n\t time : \"2:00:00\"\n cpu : threads\n }\n\n output{\n File short = \"bins.tooShort.fa\"\n File low = \"bins.lowDepth.fa\"\n File unbinned = \"bins.unbinned.fa\"\n File checkm = \"checkm-qa.out\"\n File stats_json = \"MAGs_stats.json\"\n File stats_tsv = \"MAGs_stats.tsv\"\n File mbin_sdb = \"mbin.sdb\"\n File mbin_version = \"mbin_nmdc_versions.log\"\n File bacsum = \"gtdbtk-output/gtdbtk.bac120.summary.tsv\"\n File arcsum = \"gtdbtk-output/gtdbtk.ar122.summary.tsv\"\n\t File eukcc_csv = \"eukcc_output/eukcc.csv.final\"\n Array[File] hqmq_bin_fasta_files = glob(\"hqmq-metabat-bins/*fa\")\n Array[File] lq_bin_fasta_files = glob(\"filtered-metabat-bins/*fa\")\n } \n}\n\n\ntask stage {\n input{\n String container\n String contig_file\n String sam_file\n String gff_file\n String proteins_file\n String cog_file\n String ec_file\n String ko_file\n String pfam_file\n String tigrfam_file\n String cath_funfam_file\n String smart_file\n String supfam_file\n String product_names_file\n String gene_phylogeny_file\n String lineage_file\n String contigs_out=\"contigs.fasta\"\n String bam_out=\"pairedMapped_sorted.bam\"\n String gff_out=\"functional_annotation.gff\"\n String proteins_out=\"proteins.faa\"\n String cog_out=\"cog.gff\"\n String ec_out=\"ec.tsv\"\n String ko_out=\"ko.tsv\"\n String pfam_out=\"pfam.gff\"\n String tigrfam_out=\"tigrfam.gff\"\n String cath_funfam_out=\"cath_funfam.gff\"\n String smart_out=\"smart.gff\"\n String supfam_out=\"supfam.gff\"\n String products_out=\"products.tsv\"\n String gene_phylogeny_out=\"gene_phylogeny.tsv\"\n String lineage_out=\"lineage.tsv\"\n }\n command<<<\n\n set -e\n\n function stage() {\n in=$1\n out=$2\n if [ $( echo $in |egrep -c \"https*:\") -gt 0 ] ; then\n wget $in -O $out\n else\n ln $in $out || cp $in $out\n fi\n }\n\n stage ~{contig_file} ~{contigs_out}\n stage ~{sam_file} ~{bam_out}\n stage ~{gff_file} ~{gff_out}\n stage ~{proteins_file} ~{proteins_out}\n stage ~{cog_file} ~{cog_out}\n stage ~{ec_file} ~{ec_out}\n stage ~{ko_file} ~{ko_out}\n stage ~{pfam_file} ~{pfam_out}\n stage ~{tigrfam_file} ~{tigrfam_out}\n stage ~{cath_funfam_file} ~{cath_funfam_out}\n stage ~{smart_file} ~{smart_out}\n stage ~{supfam_file} ~{supfam_out}\n stage ~{product_names_file} ~{products_out}\n stage ~{gene_phylogeny_file} ~{gene_phylogeny_out}\n stage ~{lineage_file} ~{lineage_out}\n\n date --iso-8601=seconds > start.txt\n\n >>>\n\n output{\n File contig = \"contigs.fasta\"\n File sam = \"pairedMapped_sorted.bam\"\n File gff = \"functional_annotation.gff\"\n File proteins = \"proteins.faa\"\n File cog = \"cog.gff\"\n File ec = \"ec.tsv\"\n File ko = \"ko.tsv\"\n File pfam = \"pfam.gff\"\n File tigrfam = \"tigrfam.gff\"\n File cath_funfam = \"cath_funfam.gff\"\n File smart = \"smart.gff\"\n File supfam = \"supfam.gff\"\n File product_names = \"products.tsv\"\n File gene_phylogeny = \"gene_phylogeny.tsv\"\n File lineage_tsv = \"lineage.tsv\"\n String start = read_string(\"start.txt\")\n }\n runtime {\n memory: \"1 GiB\"\n cpu: 2\n maxRetries: 1\n docker: container\n }\n}\n\n\ntask package{\n input{\n String proj\n String prefix=sub(proj, \":\", \"_\")\n Array[File] bins\n File json_stats\n File gff_file\n File proteins_file\n File cog_file\n File ec_file\n File ko_file\n File pfam_file\n File tigrfam_file\n File cath_funfam_file\n File smart_file\n File supfam_file\n File product_names_file\n String container \n }\n command<<<\n set -e\n create_tarfiles.py ~{prefix} \\\n ~{json_stats} ~{gff_file} ~{proteins_file} ~{cog_file} \\\n ~{ec_file} ~{ko_file} ~{pfam_file} ~{tigrfam_file} \\\n ~{cath_funfam_file} ~{smart_file} ~{supfam_file} \\\n ~{product_names_file} \\\n ~{sep=\" \" bins}\n\n if [ -f ~{prefix}_heatmap.pdf ]; then\n echo \"KO analysis plot exists.\"\n else\n echo \"No KO analysis result for ~{proj}\" > ~{prefix}_heatmap.pdf\n echo \"No KO analysis result for ~{proj}\" > ~{prefix}_barplot.pdf\n echo \"No KO analysis result for ~{proj}\" > ~{prefix}_ko_krona.html\n echo \"No KO analysis result for ~{proj}\" > ~{prefix}_module_completeness.tab\n fi\n >>>\n output {\n Array[File] hqmq_bin_tarfiles = flatten([glob(\"*_HQ.tar.gz\"), glob(\"*_MQ.tar.gz\")])\n Array[File] lq_bin_tarfiles = glob(\"*_LQ.tar.gz\") \n File barplot = prefix + \"_barplot.pdf\"\n File heatmap = prefix + \"_heatmap.pdf\"\n File kronaplot = prefix + \"_ko_krona.html\"\n File ko_matrix = prefix + \"_module_completeness.tab\"\n }\n runtime {\n docker: container\n memory: \"1 GiB\"\n cpu: 1\n }\n}\n\ntask finish_mags {\n input{\n String container\n File contigs\n File anno_gff\n File sorted_bam\n File mbin_sdb\n File mbin_version\n String proj\n String prefix=sub(proj, \":\", \"_\")\n String start\n File bacsum\n File arcsum\n File? short\n File? low\n File? unbinned\n File? checkm\n Array[File] hqmq_bin_fasta_files\n Array[File] bin_fasta_files\n Array[File] hqmq_bin_tarfiles\n Array[File] lq_bin_tarfiles\n File stats_json\n File stats_tsv\n Int n_hqmq=length(hqmq_bin_tarfiles)\n Int n_lq=length(lq_bin_tarfiles)\n File barplot\n File heatmap\n File kronaplot\n File ko_matrix\n File eukcc_file\n }\n command<<<\n set -e\n end=`date --iso-8601=seconds`\n\n ln ~{low} ~{prefix}_bins.lowDepth.fa\n ln ~{short} ~{prefix}_bins.tooShort.fa\n ln ~{unbinned} ~{prefix}_bins.unbinned.fa\n ln ~{checkm} ~{prefix}_checkm_qa.out\n ln ~{mbin_version} ~{prefix}_bin.info\n ln ~{bacsum} ~{prefix}_gtdbtk.bac122.summary.tsv\n ln ~{arcsum} ~{prefix}_gtdbtk.ar122.summary.tsv\n ln ~{barplot} ~{prefix}_barplot.pdf\n ln ~{heatmap} ~{prefix}_heatmap.pdf\n ln ~{kronaplot} ~{prefix}_kronaplot.html\n ln ~{ko_matrix} ~{prefix}_ko_matrix.txt\n\n # cp all tarfiles, zip them under prefix, if empty touch no_mags.txt\n mkdir -p hqmq\n if [ ~{n_hqmq} -gt 0 ] ; then\n (cd hqmq && cp ~{sep=\" \" hqmq_bin_tarfiles} .)\n (cd hqmq && cp ~{mbin_sdb} .)\n (cd hqmq && zip -j ../~{prefix}_hqmq_bin.zip *tar.gz mbin.sdb ../*pdf ../*kronaplot.html ../*ko_matrix.txt)\n else\n (cd hqmq && touch no_hqmq_mags.txt)\n (cd hqmq && cp ~{mbin_sdb} .)\n (cd hqmq && zip ../~{prefix}_hqmq_bin.zip *.txt mbin.sdb)\n fi\n\n mkdir -p lq\n if [ ~{n_lq} -gt 0 ] ; then\n (cd lq && cp ~{sep=\" \" lq_bin_tarfiles} .)\n (cd lq && cp ~{mbin_sdb} .)\n (cd lq && zip -j ../~{prefix}_lq_bin.zip *tar.gz mbin.sdb ~{eukcc_file} ../*pdf ../*kronaplot.html ../*ko_matrix.txt)\n else\n (cd lq && touch no_lq_mags.txt)\n (cd lq && cp ~{mbin_sdb} .)\n (cd lq && zip ../~{prefix}_lq_bin.zip *.txt mbin.sdb ~{eukcc_file} )\n fi\n\n # Fix up attribute name\n cat ~{stats_json} | \\\n sed 's/: null/: \"null\"/g' | \\\n sed 's/lowDepth_/low_depth_/' > ~{prefix}_mags_stats.json\n\n >>>\n\n output {\n File final_checkm = \"~{prefix}_checkm_qa.out\"\n File final_hqmq_bins_zip = \"~{prefix}_hqmq_bin.zip\"\n File final_lq_bins_zip = \"~{prefix}_lq_bin.zip\"\n File final_stats_json = \"~{prefix}_mags_stats.json\"\n File final_gtdbtk_bac_summary = \"~{prefix}_gtdbtk.bac122.summary.tsv\"\n File final_gtdbtk_ar_summary = \"~{prefix}_gtdbtk.ar122.summary.tsv\"\n File final_lowDepth_fa = \"~{prefix}_bins.lowDepth.fa\"\n File final_unbinned_fa = \"~{prefix}_bins.unbinned.fa\"\n File final_short = \"~{prefix}_bins.tooShort.fa\"\n File final_version = \"~{prefix}_bin.info\"\n File final_kronaplot = \"~{prefix}_kronaplot.html\"\n File final_heatmap = \"~{prefix}_heatmap.pdf\"\n File final_barplot = \"~{prefix}_barplot.pdf\"\n }\n\n runtime {\n memory: \"10 GiB\"\n cpu: 4\n maxRetries: 1\n docker: container\n }\n}\n", + "root": "", + "options": "{\n\n}", + "inputs": "{\"nmdc_mags.cath_funfam_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_cath_funfam.gff\",\"nmdc_mags.cog_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_cog.gff\",\"nmdc_mags.contig_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgas-13-56028x05.1/nmdc_wfmgas-13-56028x05.1_contigs.fna\",\"nmdc_mags.ec_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_ec.tsv\",\"nmdc_mags.gene_phylogeny_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_gene_phylogeny.tsv\",\"nmdc_mags.gff_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_functional_annotation.gff\",\"nmdc_mags.ko_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_ko.tsv\",\"nmdc_mags.lineage_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_scaffold_lineage.tsv\",\"nmdc_mags.pfam_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_pfam.gff\",\"nmdc_mags.product_names_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_product_names.tsv\",\"nmdc_mags.proj\":\"nmdc:wfmag-12-fxwdrv82.1\",\"nmdc_mags.proteins_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_proteins.faa\",\"nmdc_mags.sam_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgas-13-56028x05.1/nmdc_wfmgas-13-56028x05.1_pairedMapped_sorted.bam\",\"nmdc_mags.smart_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_smart.gff\",\"nmdc_mags.supfam_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_supfam.gff\",\"nmdc_mags.tigrfam_file\":\"https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_tigrfam.gff\"}", + "workflowUrl": "", + "labels": "{\"release\": \"v1.3.2\", \"wdl\": \"mbin_nmdc.wdl\", \"git_repo\": \"https://github.com/microbiomedata/metaMAGs\", \"pipeline_version\": \"v1.3.2\", \"pipeline\": \"mbin_nmdc.wdl\", \"activity_id\": \"nmdc:wfmag-12-fxwdrv82.1\", \"opid\": \"nmdc:sys0v1137690\", \"submitter\": \"nmdcda\"}" + }, + "calls": { + "nmdc_mags.stage": [ + { + "executionStatus": "Done", + "stdout": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/stdout", + "backendStatus": "Done", + "compressedDockerSize": 1221549662, + "commandLine": "set -e\n\n function stage() {\n in=$1\n out=$2\n if [ $( echo $in |egrep -c \"https*:\") -gt 0 ] ; then\n wget $in -O $out\n else\n ln $in $out || cp $in $out\n fi\n }\n\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgas-13-56028x05.1/nmdc_wfmgas-13-56028x05.1_contigs.fna contigs.fasta\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgas-13-56028x05.1/nmdc_wfmgas-13-56028x05.1_pairedMapped_sorted.bam pairedMapped_sorted.bam\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_functional_annotation.gff functional_annotation.gff\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_proteins.faa proteins.faa\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_cog.gff cog.gff\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_ec.tsv ec.tsv\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_ko.tsv ko.tsv\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_pfam.gff pfam.gff\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_tigrfam.gff tigrfam.gff\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_cath_funfam.gff cath_funfam.gff\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_smart.gff smart.gff\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_supfam.gff supfam.gff\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_product_names.tsv products.tsv\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_gene_phylogeny.tsv gene_phylogeny.tsv\n stage https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_scaffold_lineage.tsv lineage.tsv\n\ndate --iso-8601=seconds > start.txt", + "shardIndex": -1, + "outputs": { + "cog": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/cog.gff", + "ec": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/ec.tsv", + "gene_phylogeny": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/gene_phylogeny.tsv", + "tigrfam": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/tigrfam.gff", + "gff": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/functional_annotation.gff", + "cath_funfam": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/cath_funfam.gff", + "smart": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/smart.gff", + "sam": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/pairedMapped_sorted.bam", + "supfam": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/supfam.gff", + "proteins": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/proteins.faa", + "pfam": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/pfam.gff", + "ko": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/ko.tsv", + "contig": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/contigs.fasta", + "start": "2024-07-01T17:24:07+00:00", + "lineage_tsv": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/lineage.tsv", + "product_names": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/products.tsv" + }, + "runtimeAttributes": { + "runtime_minutes": "120", + "priority": "0", + "disk": "0.244140625 GB", + "failOnStderr": "false", + "continueOnReturnCode": "0", + "docker": "microbiomedata/nmdc_mbin@sha256:57930406fb5cc364bacfc904066519de6cdc2d0ceda9db0eebf2336df3ef5349", + "maxRetries": "1", + "cpu": "2", + "memory": "1 GB" + }, + "callCaching": { + "allowResultReuse": true, + "hit": false, + "result": "Cache Miss", + "hashes": { + "output count": "C74D97B01EAE257E44AA9D5BADE97BAF", + "runtime attribute": { + "docker": "F72A72C28765615372494FF01D657EF0", + "continueOnReturnCode": "CFCD208495D565EF66E7DFF9F98764DA", + "failOnStderr": "68934A3E9455FA72420237EB05902327" + }, + "output expression": { + "File supfam": "57E594B7FFCEEB39CF06D332A5562FAC", + "File gff": "1480321F9248DF00AB8D641DB3A80283", + "File smart": "618329130FDCC2FA17D21157AA4341B7", + "File contig": "B3B220254A872C9A27EEEA164CEA7180", + "File lineage_tsv": "AA53DB68E93BC4D66EA98BBFF733FA07", + "File tigrfam": "7C3B7598C68086C56536F1484BF6B4E7", + "File ko": "C0DF3CDC23F8B885E8F2D1C783D961EB", + "File gene_phylogeny": "77F6C9BA0C2A6969FEC44C172C557157", + "String start": "9180F439602EB455553A080D43DB2473", + "File cog": "57E7D9F6C6027CB5F88CF52197A0E09A", + "File sam": "297A5965C834F32050A732873F038C12", + "File proteins": "78FF964764B16900FF2D3F5A7B34FB7B", + "File product_names": "121113CFD93DA6D5AB12948E5AEC22B3", + "File pfam": "E1434D4AE774C068C3993C5DA573CCC1", + "File cath_funfam": "5367DC3284E76222A31899AF8AB88BE0", + "File ec": "9B2989B8720CFF2E291E152C83908B58" + }, + "input count": "C16A5320FA475530D9583C34FD356EF5", + "backend name": "24B80D5AA1F64928B14AC8407909E586", + "command template": "945A6BCF429CF7799750A5446DE41E8D", + "input": { + "String sam_file": "19E9B366BAA40BD1CEAD418682EF03D0", + "String smart_file": "FCD3094FC3D4A65194D8D779657644DD", + "String contig_file": "972176A3B31DB605CAC2CB281E1BC924", + "String tigrfam_out": "7C3B7598C68086C56536F1484BF6B4E7", + "String proteins_file": "91CB7DAF44756D25FB41C1120FF38528", + "String container": "D7A0D4DC020579C472C721E2466C221B", + "String gene_phylogeny_file": "27B9190B78398205DA4E738C38F57677", + "String ec_out": "9B2989B8720CFF2E291E152C83908B58", + "String cath_funfam_file": "00D75F5AECD0AC5F2102096DCF140057", + "String cog_out": "57E7D9F6C6027CB5F88CF52197A0E09A", + "String contigs_out": "B3B220254A872C9A27EEEA164CEA7180", + "String ec_file": "869EB30CAA5362F12D0CEDB87B865FC9", + "String pfam_file": "1AD2F0CA348A7494974578FD03A73263", + "String ko_file": "43B94099DB185A0E7A97650AF74218F3", + "String supfam_out": "57E594B7FFCEEB39CF06D332A5562FAC", + "String products_out": "121113CFD93DA6D5AB12948E5AEC22B3", + "String smart_out": "618329130FDCC2FA17D21157AA4341B7", + "String proteins_out": "78FF964764B16900FF2D3F5A7B34FB7B", + "String gff_file": "5BFC9D19D9A3AA6346CA57C0981D991C", + "String supfam_file": "854A0384656E64DA95C89B1688AF4B47", + "String tigrfam_file": "5E3FDBBD21DE9295237F3AF1F9B81EDE", + "String lineage_out": "AA53DB68E93BC4D66EA98BBFF733FA07", + "String ko_out": "C0DF3CDC23F8B885E8F2D1C783D961EB", + "String lineage_file": "B3823B980A369F2AFEE9097020EB450B", + "String gene_phylogeny_out": "77F6C9BA0C2A6969FEC44C172C557157", + "String cath_funfam_out": "5367DC3284E76222A31899AF8AB88BE0", + "String gff_out": "1480321F9248DF00AB8D641DB3A80283", + "String pfam_out": "E1434D4AE774C068C3993C5DA573CCC1", + "String product_names_file": "D7A68F21BCA5FE1295BB4B75EBC0F004", + "String bam_out": "297A5965C834F32050A732873F038C12", + "String cog_file": "99130451CB1A2AEA46688AA60AE0AA9E" + } + }, + "effectiveCallCachingMode": "ReadAndWriteCache" + }, + "inputs": { + "proteins_out": "proteins.faa", + "pfam_out": "pfam.gff", + "smart_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_smart.gff", + "lineage_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_scaffold_lineage.tsv", + "cog_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_cog.gff", + "cog_out": "cog.gff", + "container": "microbiomedata/nmdc_mbin@sha256:57930406fb5cc364bacfc904066519de6cdc2d0ceda9db0eebf2336df3ef5349", + "proteins_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_proteins.faa", + "gene_phylogeny_out": "gene_phylogeny.tsv", + "product_names_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_product_names.tsv", + "contigs_out": "contigs.fasta", + "tigrfam_out": "tigrfam.gff", + "cath_funfam_out": "cath_funfam.gff", + "cath_funfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_cath_funfam.gff", + "supfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_supfam.gff", + "gene_phylogeny_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_gene_phylogeny.tsv", + "supfam_out": "supfam.gff", + "lineage_out": "lineage.tsv", + "ko_out": "ko.tsv", + "ko_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_ko.tsv", + "ec_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_ec.tsv", + "gff_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_functional_annotation.gff", + "smart_out": "smart.gff", + "pfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_pfam.gff", + "gff_out": "functional_annotation.gff", + "bam_out": "pairedMapped_sorted.bam", + "products_out": "products.tsv", + "ec_out": "ec.tsv", + "tigrfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_tigrfam.gff", + "sam_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgas-13-56028x05.1/nmdc_wfmgas-13-56028x05.1_pairedMapped_sorted.bam", + "contig_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgas-13-56028x05.1/nmdc_wfmgas-13-56028x05.1_contigs.fna" + }, + "returnCode": 0, + "jobId": "157723", + "backend": "HtCondor", + "end": "2024-07-01T17:28:13.648Z", + "dockerImageUsed": "microbiomedata/nmdc_mbin@sha256:57930406fb5cc364bacfc904066519de6cdc2d0ceda9db0eebf2336df3ef5349", + "stderr": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/stderr", + "callRoot": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage", + "attempt": 1, + "executionEvents": [ + { + "startTime": "2024-07-01T17:28:12.696Z", + "description": "UpdatingJobStore", + "endTime": "2024-07-01T17:28:13.648Z" + }, + { + "startTime": "2024-07-01T16:55:06.338Z", + "description": "RunningJob", + "endTime": "2024-07-01T17:28:10.225Z" + }, + { + "startTime": "2024-07-01T16:55:06.331Z", + "description": "CallCacheReading", + "endTime": "2024-07-01T16:55:06.338Z" + }, + { + "startTime": "2024-07-01T16:54:58.165Z", + "description": "Pending", + "endTime": "2024-07-01T16:54:58.165Z" + }, + { + "startTime": "2024-07-01T16:54:58.165Z", + "description": "RequestingExecutionToken", + "endTime": "2024-07-01T16:55:05.794Z" + }, + { + "startTime": "2024-07-01T16:55:05.794Z", + "description": "PreparingJob", + "endTime": "2024-07-01T16:55:06.331Z" + }, + { + "startTime": "2024-07-01T17:28:10.225Z", + "description": "UpdatingCallCache", + "endTime": "2024-07-01T17:28:12.696Z" + }, + { + "startTime": "2024-07-01T16:55:05.794Z", + "description": "WaitingForValueStore", + "endTime": "2024-07-01T16:55:05.794Z" + } + ], + "start": "2024-07-01T16:54:58.165Z" + } + ], + "nmdc_mags.mbin_nmdc": [ + { + "executionStatus": "Done", + "stdout": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/stdout", + "backendStatus": "Done", + "compressedDockerSize": 1221549662, + "commandLine": "set -euo pipefail\nexport GTDBTK_DATA_PATH=/refdata/GTDBTK_DB/gtdbtk_release207_v2\nexport CHECKM_DATA_PATH=/refdata/checkM_DB/checkm_data_2015_01_16\nmbin.py --threads 64 --pthreads 1 --eukccdb /refdata/EUKCC2_DB/eukcc2_db_ver_1.2 --fna /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/inputs/-51889329/contigs.fasta --gff /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/inputs/-51889329/functional_annotation.gff --aln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/inputs/-51889329/pairedMapped_sorted.bam --lintsv /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/inputs/-51889329/lineage.tsv\nmbin_stats.py $PWD\nmbin_versions.py > mbin_nmdc_versions.log\ntouch MAGs_stats.tsv\n\nif [ -f gtdbtk-output/gtdbtk.bac120.summary.tsv ]; then\n echo \"bacterial summary exists.\"\nelse\n mkdir -p gtdbtk-output\n echo \"No Bacterial Results for nmdc:wfmag-12-fxwdrv82.1\" > gtdbtk-output/gtdbtk.bac120.summary.tsv\nfi\n\nif [ -f gtdbtk-output/gtdbtk.ar122.summary.tsv ]; then\n echo \"archaeal summary exists.\"\nelse\n mkdir -p gtdbtk-output\n echo \"No Archaeal Results for nmdc:wfmag-12-fxwdrv82.1\" > gtdbtk-output/gtdbtk.ar122.summary.tsv\nfi\n\nif [ -f checkm-qa.out ]; then\n echo \"checkm summary exists.\"\nelse\n mkdir -p gtdbtk-output\n echo \"No Checkm Results for nmdc:wfmag-12-fxwdrv82.1\" > checkm-qa.out\nfi\n\nif [ -f mbin.sdb ]; then\n echo \"mbin.sdb exists.\"\nelse\n mkdir -p gtdbtk-output\n echo \"Mbin Sdb Could not be created for nmdc:wfmag-12-fxwdrv82.1\" > mbin.sdb\nfi\n\nif [ -f eukcc_output/eukcc.csv.final ]; then\n echo \"eukcc.csv.final exists.\"\nelse\n mkdir -p eukcc_output\n echo \"No EUKCC2 result for nmdc:wfmag-12-fxwdrv82.1\" > eukcc_output/eukcc.csv.final\nfi", + "shardIndex": -1, + "outputs": { + "stats_json": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/MAGs_stats.json", + "eukcc_csv": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/eukcc_output/eukcc.csv.final", + "arcsum": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/gtdbtk-output/gtdbtk.ar122.summary.tsv", + "short": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/bins.tooShort.fa", + "stats_tsv": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/MAGs_stats.tsv", + "hqmq_bin_fasta_files": [ + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.1.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.11.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.12.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.17.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.20.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.21.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.22.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.33.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.37.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.38.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.40.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.42.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.43.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.7.fa" + ], + "low": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/bins.lowDepth.fa", + "lq_bin_fasta_files": [ + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.10.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.13.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.14.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.15.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.16.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.18.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.19.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.2.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.23.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.24.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.25.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.26.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.27.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.28.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.29.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.3.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.30.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.31.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.32.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.34.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.35.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.36.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.39.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.4.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.41.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.44.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.45.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.46.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.47.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.5.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.6.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.8.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.9.fa" + ], + "checkm": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/checkm-qa.out", + "unbinned": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/bins.unbinned.fa", + "mbin_version": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/mbin_nmdc_versions.log", + "mbin_sdb": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/mbin.sdb", + "bacsum": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/gtdbtk-output/gtdbtk.bac120.summary.tsv" + }, + "runtimeAttributes": { + "runtime_minutes": "120", + "priority": "0", + "disk": "0.244140625 GB", + "failOnStderr": "false", + "continueOnReturnCode": "0", + "docker": "microbiomedata/nmdc_mbin@sha256:57930406fb5cc364bacfc904066519de6cdc2d0ceda9db0eebf2336df3ef5349", + "maxRetries": "0", + "cpu": "64", + "memory": "120 GB" + }, + "callCaching": { + "allowResultReuse": true, + "hit": false, + "result": "Cache Miss", + "hashes": { + "output count": "C51CE410C124A10E0DB5E4B97FC2AF39", + "runtime attribute": { + "docker": "F72A72C28765615372494FF01D657EF0", + "continueOnReturnCode": "CFCD208495D565EF66E7DFF9F98764DA", + "failOnStderr": "68934A3E9455FA72420237EB05902327" + }, + "output expression": { + "File arcsum": "787FE996F609C8C519F3CAF8A1B6CC4C", + "File stats_tsv": "93F275502607D245963D78AA311B330F", + "Array(File) hqmq_bin_fasta_files": "9FEEFEF01742A6D55705986B498E72D6", + "File stats_json": "BB64C91BA00FBFA822525C0CA83F7551", + "File bacsum": "0716BEAF9194B529A691ABA8169DF4A7", + "File mbin_version": "30DBADB0745B9CFBE83733A77E7B9ED2", + "File unbinned": "C40A14993B2B418745AE85C7D38560A0", + "File mbin_sdb": "C1F64B80E92F88AAAFD0F32281C86583", + "File checkm": "8014C70611B1425FB83B75AC0A646927", + "File eukcc_csv": "34B03AA4F494AF7218F14090EE042A0E", + "Array(File) lq_bin_fasta_files": "9A9271C31FA80817D2D58CE66E57A99C", + "File low": "DD7E48A19D33A1E113823D26F8B1DA5D", + "File short": "7E351A09B6E8E88DCE39A4E9D6577D62" + }, + "input count": "6512BD43D9CAA6E02C990B0A82652DCA", + "backend name": "24B80D5AA1F64928B14AC8407909E586", + "command template": "4A7638A485C9681C06B748116359536B", + "input": { + "File gff": "ea2c8bf4db0775c024361a3681f5e365", + "String gtdbtk_env": "CB23C84675C6493CC5F510B167E1EC23", + "String checkm_env": "12F5EA073DE844C333782082E5FFDEB9", + "File aln": "61d7bc083c1417c3d390cdac26c24faf", + "String mbin_container": "D7A0D4DC020579C472C721E2466C221B", + "String name": "0B6C033A7D83F6E4B430E858C83BD9F7", + "Int pthreads": "C4CA4238A0B923820DCC509A6F75849B", + "File fna": "f89ee38065ee7324bdbd46c627faae33", + "String eukcc2_env": "7EF9D719D5795EC372BD096B3A0766AE", + "Int threads": "EA5D2F1C4608232E07D3AA3D998E5135", + "File lineage": "a0062f034e4b177ab3cd11d9ffc1470f" + } + }, + "effectiveCallCachingMode": "ReadAndWriteCache" + }, + "inputs": { + "map_file": null, + "name": "nmdc:wfmag-12-fxwdrv82.1", + "lineage": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/lineage.tsv", + "gff": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/functional_annotation.gff", + "fna": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/contigs.fasta", + "aln": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/pairedMapped_sorted.bam", + "mbin_container": "microbiomedata/nmdc_mbin@sha256:57930406fb5cc364bacfc904066519de6cdc2d0ceda9db0eebf2336df3ef5349", + "eukcc2_env": "/refdata/EUKCC2_DB/eukcc2_db_ver_1.2", + "gtdbtk_env": "/refdata/GTDBTK_DB/gtdbtk_release207_v2", + "threads": 64, + "pthreads": 1, + "checkm_env": "/refdata/checkM_DB/checkm_data_2015_01_16" + }, + "returnCode": 0, + "jobId": "157724", + "backend": "HtCondor", + "end": "2024-07-01T18:26:22.636Z", + "dockerImageUsed": "microbiomedata/nmdc_mbin@sha256:57930406fb5cc364bacfc904066519de6cdc2d0ceda9db0eebf2336df3ef5349", + "stderr": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/stderr", + "callRoot": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc", + "attempt": 1, + "executionEvents": [ + { + "startTime": "2024-07-01T17:28:15.324Z", + "description": "RequestingExecutionToken", + "endTime": "2024-07-01T17:28:15.794Z" + }, + { + "startTime": "2024-07-01T17:28:15.794Z", + "description": "WaitingForValueStore", + "endTime": "2024-07-01T17:28:15.794Z" + }, + { + "startTime": "2024-07-01T18:26:21.670Z", + "description": "UpdatingJobStore", + "endTime": "2024-07-01T18:26:22.636Z" + }, + { + "startTime": "2024-07-01T17:28:15.801Z", + "description": "CallCacheReading", + "endTime": "2024-07-01T17:28:15.806Z" + }, + { + "startTime": "2024-07-01T17:28:15.794Z", + "description": "PreparingJob", + "endTime": "2024-07-01T17:28:15.801Z" + }, + { + "startTime": "2024-07-01T17:28:15.806Z", + "description": "RunningJob", + "endTime": "2024-07-01T18:26:19.764Z" + }, + { + "startTime": "2024-07-01T18:26:19.764Z", + "description": "UpdatingCallCache", + "endTime": "2024-07-01T18:26:21.670Z" + }, + { + "startTime": "2024-07-01T17:28:15.324Z", + "description": "Pending", + "endTime": "2024-07-01T17:28:15.324Z" + } + ], + "start": "2024-07-01T17:28:15.324Z" + } + ], + "nmdc_mags.package": [ + { + "executionStatus": "Done", + "stdout": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/execution/stdout", + "backendStatus": "Done", + "compressedDockerSize": 493656270, + "commandLine": " set -e\n create_tarfiles.py nmdc_wfmag-12-fxwdrv82.1 \\\n /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-1737589708/MAGs_stats.json /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-51889329/functional_annotation.gff /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-51889329/proteins.faa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-51889329/cog.gff \\\n /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-51889329/ec.tsv /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-51889329/ko.tsv /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-51889329/pfam.gff /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-51889329/tigrfam.gff \\\n /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-51889329/cath_funfam.gff /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-51889329/smart.gff /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-51889329/supfam.gff \\\n /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-51889329/products.tsv \\\n /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.1.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.11.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.12.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.17.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.20.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.21.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.22.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.33.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.37.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.38.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.40.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.42.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.43.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/840927098/bins.7.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.10.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.13.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.14.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.15.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.16.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.18.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.19.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.2.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.23.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.24.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.25.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.26.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.27.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.28.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.29.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.3.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.30.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.31.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.32.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.34.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.35.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.36.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.39.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.4.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.41.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.44.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.45.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.46.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.47.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.5.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.6.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.8.fa /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/inputs/-259341658/bins.9.fa\n\nif [ -f nmdc_wfmag-12-fxwdrv82.1_heatmap.pdf ]; then\n echo \"KO analysis plot exists.\"\nelse\n echo \"No KO analysis result for nmdc:wfmag-12-fxwdrv82.1\" > nmdc_wfmag-12-fxwdrv82.1_heatmap.pdf\n echo \"No KO analysis result for nmdc:wfmag-12-fxwdrv82.1\" > nmdc_wfmag-12-fxwdrv82.1_barplot.pdf\n echo \"No KO analysis result for nmdc:wfmag-12-fxwdrv82.1\" > nmdc_wfmag-12-fxwdrv82.1_ko_krona.html\n echo \"No KO analysis result for nmdc:wfmag-12-fxwdrv82.1\" > nmdc_wfmag-12-fxwdrv82.1_module_completeness.tab\nfi", + "shardIndex": -1, + "outputs": { + "barplot": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/execution/nmdc_wfmag-12-fxwdrv82.1_barplot.pdf", + "ko_matrix": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/execution/nmdc_wfmag-12-fxwdrv82.1_module_completeness.tab", + "heatmap": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/execution/nmdc_wfmag-12-fxwdrv82.1_heatmap.pdf", + "hqmq_bin_tarfiles": [], + "kronaplot": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/execution/nmdc_wfmag-12-fxwdrv82.1_ko_krona.html", + "lq_bin_tarfiles": [] + }, + "runtimeAttributes": { + "runtime_minutes": "120", + "priority": "0", + "disk": "0.244140625 GB", + "failOnStderr": "false", + "continueOnReturnCode": "0", + "docker": "microbiomedata/nmdc_mbin_vis:0.2.0", + "maxRetries": "0", + "cpu": "1", + "memory": "1 GB" + }, + "callCaching": { + "allowResultReuse": true, + "hit": false, + "result": "Cache Miss", + "hashes": { + "output count": "1679091C5A880FAF6FB5E6087EB1B2DC", + "runtime attribute": { + "docker": "15EF5F3946F3FB97DB9077BDFB2EC05E", + "continueOnReturnCode": "CFCD208495D565EF66E7DFF9F98764DA", + "failOnStderr": "68934A3E9455FA72420237EB05902327" + }, + "output expression": { + "File barplot": "29CD6D98A3A8C29CA8D89F6B93286D44", + "Array(File) hqmq_bin_tarfiles": "4886452572964CFDBDE8441A90C90F28", + "File ko_matrix": "763DF5AD0AE3C8678D01B116FF35A06F", + "File kronaplot": "2CB90AF81AD50DF61944B0002F5DDBC5", + "File heatmap": "63EB363AF392505E28D439A85E705E8E", + "Array(File) lq_bin_tarfiles": "0D92AD8038C4661C1A37555A1B914BC6" + }, + "input count": "44F683A84163B3523AFE57C2E008BC8C", + "backend name": "24B80D5AA1F64928B14AC8407909E586", + "command template": "7CA922014F224723BB6DFB7456A87D7B", + "input": { + "File gff_file": "ea2c8bf4db0775c024361a3681f5e365", + "File cath_funfam_file": "f6035ac8fcad366b2aaf510c47e31946", + "File cog_file": "b7cf519f94ae0dcf4af94df2e537f557", + "String container": "E4724EB95C045566807727E42591F619", + "File supfam_file": "bfed8ab1bd61358ec2fe1395b6fb5ea9", + "File pfam_file": "65f06b443535f999e1c1e8c905256011", + "File ec_file": "5c09a91c4e53978aeb9d4483b0cc11a4", + "File bins": [ + "4d9b99511546584bce7c7fab1fb3239a", + "a4eac78eb4c8b0ecf0e8756233771ace", + "35b44a186ca246254e2ed7e71863a4b7", + "07be3fec5a7a91e3a2cf645dc5dd8f35", + "e0aad9a4ab114fce04b3c8f237947177", + "7de3db5f8dfb1dd3939a57988df964c3", + "c3b17de2b3762611e07796c1d9f071db", + "7371ac1f2883778219ea5384aab65cf0", + "10ffa413343a45a3e2483ddde6714372", + "df5bc7b394e2ff05ad66b13e678af5ec", + "d86ad0e91c260e66ae0aae5215117766", + "f31f3cecbc75788f542bf8a282e809d4", + "b4a919b6d17530372401f4265f4280d5", + "fb687e086e66cfd6d1c9562227ff82eb", + "4c43133298d71002259f856720995efb", + "0bb311382c673962f0f7587eb614ea33", + "c6b739ac09ea09f5a5b243fc81ed075f", + "d8ed5ba779e0c17bfac43e46717b7df6", + "9c752f3ab53db1304bc115d7b112122f", + "9d52837869fcb54b8f35d2b07c536d16", + "6264770bfb30cd917cba490fe4496827", + "5caf431a8a98222902e18d6b619b94e3", + "ddab74b9de92c0366bd6ac3a90ec3ac5", + "b636e39db668cf7f1082b38de5565c84", + "eeef47252a10fc24c9ca367f38479a98", + "17899bef5c05f0f67c0688e9bfa7b84f", + "3cd99095e4e49337f1a0ba8e2e78b8d4", + "557f966b1240c5ad8e0e881db3b7c763", + "445e37d0f2ec02326de0a67215bc9b92", + "ee0bfc516fa184e588c21a943c2b75e0", + "8ceebf37f47bc4eb3d440cd4f97ad726", + "015dc9c1217aff8439ddf12df1c870f8", + "dd8c5036801f9c357bd8f79fce9309df", + "584f8a3b2eb9135794f45ed7d65ac3cd", + "224ec5bb64ec54b6a77eabf1d4a55512", + "ebe11fe4d6ec9d1dd79768a7b6df98b2", + "6e94dea2ac00c4ca0b0d80e6bd0e9a76", + "5fe34ad29b1856344ff826771cadd9a3", + "13ff348208368e367081f0cadc954eb9", + "d962ed10fe72bfbdf22f51d9bc46e5db", + "75d195250615dd560d442239956588fb", + "40710b79ded1cd6ea6b5b95eaff75cf4", + "72c6b89521277756677c699f911b75c9", + "6866bc52a0883b1b8449df01ce623421", + "5a2d2714f8a59e266553e53fff498490", + "eb0d7cca4fac2d7745534e671182ba16", + "d3023a46ff4c15255de05dfd50d9b18e" + ], + "File product_names_file": "c9319ff507db4478ac97150d2e693a6e", + "File proteins_file": "eddbe66a44455e361d64c9d566a92ee8", + "String __prefix": "D99564A36EECD8686D8B31D929BEB1AD", + "File ko_file": "65bc9fa8ea542c85107f2639e2bcf37d", + "File json_stats": "3c690549518ed805e22a74f41b9a5ce8", + "File tigrfam_file": "7a133d0f92c99046f6ba20348de7e819", + "String proj": "0B6C033A7D83F6E4B430E858C83BD9F7", + "File smart_file": "9f7c69d4f38aa1a8ba0db9755f467679" + } + }, + "effectiveCallCachingMode": "ReadAndWriteCache" + }, + "inputs": { + "proj": "nmdc:wfmag-12-fxwdrv82.1", + "smart_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/smart.gff", + "cog_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/cog.gff", + "prefix": null, + "container": "microbiomedata/nmdc_mbin_vis:0.2.0", + "proteins_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/proteins.faa", + "product_names_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/products.tsv", + "json_stats": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/MAGs_stats.json", + "__prefix": "nmdc_wfmag-12-fxwdrv82.1", + "cath_funfam_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/cath_funfam.gff", + "supfam_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/supfam.gff", + "ko_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/ko.tsv", + "ec_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/ec.tsv", + "gff_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/functional_annotation.gff", + "pfam_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/pfam.gff", + "bins": [ + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.1.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.11.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.12.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.17.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.20.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.21.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.22.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.33.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.37.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.38.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.40.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.42.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.43.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.7.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.10.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.13.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.14.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.15.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.16.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.18.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.19.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.2.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.23.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.24.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.25.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.26.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.27.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.28.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.29.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.3.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.30.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.31.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.32.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.34.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.35.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.36.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.39.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.4.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.41.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.44.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.45.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.46.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.47.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.5.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.6.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.8.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.9.fa" + ], + "tigrfam_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/tigrfam.gff" + }, + "returnCode": 0, + "jobId": "157725", + "backend": "HtCondor", + "end": "2024-07-01T19:53:37.640Z", + "dockerImageUsed": "microbiomedata/nmdc_mbin_vis@sha256:ec431444ad8c090932ccb4ff75d50b46b72de5d237f9069cb9bfcf47db49c911", + "stderr": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/execution/stderr", + "callRoot": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package", + "attempt": 1, + "executionEvents": [ + { + "startTime": "2024-07-01T18:26:25.794Z", + "description": "WaitingForValueStore", + "endTime": "2024-07-01T18:26:25.794Z" + }, + { + "startTime": "2024-07-01T18:26:26.360Z", + "description": "RunningJob", + "endTime": "2024-07-01T19:53:35.339Z" + }, + { + "startTime": "2024-07-01T18:26:23.725Z", + "description": "RequestingExecutionToken", + "endTime": "2024-07-01T18:26:25.794Z" + }, + { + "startTime": "2024-07-01T18:26:23.725Z", + "description": "Pending", + "endTime": "2024-07-01T18:26:23.725Z" + }, + { + "startTime": "2024-07-01T18:26:25.794Z", + "description": "PreparingJob", + "endTime": "2024-07-01T18:26:26.354Z" + }, + { + "startTime": "2024-07-01T18:26:26.354Z", + "description": "CallCacheReading", + "endTime": "2024-07-01T18:26:26.360Z" + }, + { + "startTime": "2024-07-01T19:53:35.339Z", + "description": "UpdatingCallCache", + "endTime": "2024-07-01T19:53:36.698Z" + }, + { + "startTime": "2024-07-01T19:53:36.698Z", + "description": "UpdatingJobStore", + "endTime": "2024-07-01T19:53:37.638Z" + } + ], + "start": "2024-07-01T18:26:23.725Z" + } + ], + "nmdc_mags.finish_mags": [ + { + "executionStatus": "Done", + "stdout": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/stdout", + "backendStatus": "Done", + "compressedDockerSize": 516978455, + "commandLine": "set -e\nend=`date --iso-8601=seconds`\n\nln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/-1737589708/bins.lowDepth.fa nmdc_wfmag-12-fxwdrv82.1_bins.lowDepth.fa\nln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/-1737589708/bins.tooShort.fa nmdc_wfmag-12-fxwdrv82.1_bins.tooShort.fa\nln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/-1737589708/bins.unbinned.fa nmdc_wfmag-12-fxwdrv82.1_bins.unbinned.fa\nln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/-1737589708/checkm-qa.out nmdc_wfmag-12-fxwdrv82.1_checkm_qa.out\nln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/-1737589708/mbin_nmdc_versions.log nmdc_wfmag-12-fxwdrv82.1_bin.info\nln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/184702129/gtdbtk.bac120.summary.tsv nmdc_wfmag-12-fxwdrv82.1_gtdbtk.bac122.summary.tsv\nln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/184702129/gtdbtk.ar122.summary.tsv nmdc_wfmag-12-fxwdrv82.1_gtdbtk.ar122.summary.tsv\nln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/1438666903/nmdc_wfmag-12-fxwdrv82.1_barplot.pdf nmdc_wfmag-12-fxwdrv82.1_barplot.pdf\nln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/1438666903/nmdc_wfmag-12-fxwdrv82.1_heatmap.pdf nmdc_wfmag-12-fxwdrv82.1_heatmap.pdf\nln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/1438666903/nmdc_wfmag-12-fxwdrv82.1_ko_krona.html nmdc_wfmag-12-fxwdrv82.1_kronaplot.html\nln /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/1438666903/nmdc_wfmag-12-fxwdrv82.1_module_completeness.tab nmdc_wfmag-12-fxwdrv82.1_ko_matrix.txt\n\n# cp all tarfiles, zip them under prefix, if empty touch no_mags.txt\nmkdir -p hqmq\nif [ 0 -gt 0 ] ; then\n (cd hqmq && cp .)\n (cd hqmq && cp /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/-1737589708/mbin.sdb .)\n (cd hqmq && zip -j ../nmdc_wfmag-12-fxwdrv82.1_hqmq_bin.zip *tar.gz mbin.sdb ../*pdf ../*kronaplot.html ../*ko_matrix.txt)\nelse\n (cd hqmq && touch no_hqmq_mags.txt)\n (cd hqmq && cp /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/-1737589708/mbin.sdb .)\n (cd hqmq && zip ../nmdc_wfmag-12-fxwdrv82.1_hqmq_bin.zip *.txt mbin.sdb)\nfi\n\nmkdir -p lq\nif [ 0 -gt 0 ] ; then\n (cd lq && cp .)\n (cd lq && cp /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/-1737589708/mbin.sdb .)\n (cd lq && zip -j ../nmdc_wfmag-12-fxwdrv82.1_lq_bin.zip *tar.gz mbin.sdb /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/997245024/eukcc.csv.final ../*pdf ../*kronaplot.html ../*ko_matrix.txt)\nelse\n (cd lq && touch no_lq_mags.txt)\n (cd lq && cp /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/-1737589708/mbin.sdb .)\n (cd lq && zip ../nmdc_wfmag-12-fxwdrv82.1_lq_bin.zip *.txt mbin.sdb /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/997245024/eukcc.csv.final )\nfi\n\n# Fix up attribute name\ncat /cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/inputs/-1737589708/MAGs_stats.json | \\\n sed 's/: null/: \"null\"/g' | \\\n sed 's/lowDepth_/low_depth_/' > nmdc_wfmag-12-fxwdrv82.1_mags_stats.json", + "shardIndex": -1, + "outputs": { + "final_kronaplot": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_kronaplot.html", + "final_heatmap": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_heatmap.pdf", + "final_stats_json": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_mags_stats.json", + "final_barplot": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_barplot.pdf", + "final_checkm": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_checkm_qa.out", + "final_lq_bins_zip": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_lq_bin.zip", + "final_gtdbtk_bac_summary": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_gtdbtk.bac122.summary.tsv", + "final_version": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_bin.info", + "final_lowDepth_fa": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_bins.lowDepth.fa", + "final_gtdbtk_ar_summary": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_gtdbtk.ar122.summary.tsv", + "final_unbinned_fa": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_bins.unbinned.fa", + "final_short": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_bins.tooShort.fa", + "final_hqmq_bins_zip": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/nmdc_wfmag-12-fxwdrv82.1_hqmq_bin.zip" + }, + "runtimeAttributes": { + "runtime_minutes": "120", + "priority": "0", + "disk": "0.244140625 GB", + "failOnStderr": "false", + "continueOnReturnCode": "0", + "docker": "microbiomedata/workflowmeta:1.1.1", + "maxRetries": "1", + "cpu": "4", + "memory": "10 GB" + }, + "callCaching": { + "allowResultReuse": true, + "hit": false, + "result": "Cache Miss", + "hashes": { + "output count": "C51CE410C124A10E0DB5E4B97FC2AF39", + "runtime attribute": { + "docker": "A49801FD73AE1E831A0EF44B1F79DCB9", + "continueOnReturnCode": "CFCD208495D565EF66E7DFF9F98764DA", + "failOnStderr": "68934A3E9455FA72420237EB05902327" + }, + "output expression": { + "File final_kronaplot": "DC9995C52FEB104FA3386522DF38050F", + "File final_stats_json": "8910A2E05E66C923F85F50956FE696C2", + "File final_version": "E6A065CF46B2302C846ACA239680C5DA", + "File final_gtdbtk_bac_summary": "6EB18E0253124483A545F409F88E019A", + "File final_unbinned_fa": "88D797DB5CDD28AC43432CF04BECFC3F", + "File final_heatmap": "937B0F31CD5E38CA7F65FED52A2A205C", + "File final_barplot": "D5C03677905831471247ECFB23D9F798", + "File final_short": "A5B0BBA3B45B7EE47F8C96AA1C385078", + "File final_checkm": "85FA588428B4771129EB9FF97861EFF4", + "File final_gtdbtk_ar_summary": "191BAEBF116B81592C8E1994BD37BDC9", + "File final_lowDepth_fa": "501656F8F2BA6A150B0F61CD92850B9C", + "File final_hqmq_bins_zip": "03D6BD361C83A352D1694F570065CF05", + "File final_lq_bins_zip": "C3F4E86A1F3C8E73E66CD606A1C06BFE" + }, + "input count": "E2C420D928D4BF8CE0FF2EC19B371514", + "backend name": "24B80D5AA1F64928B14AC8407909E586", + "command template": "BC3DB3241648D4FF97777DAD3AF022BF", + "input": { + "Int __n_lq": "CFCD208495D565EF66E7DFF9F98764DA", + "File arcsum": "8796101cb0baf1a8e9f6e383285eba69", + "File stats_tsv": "ab51bb3bc9e4fb4658426ba8ce55f0b5", + "File stats_json": "3c690549518ed805e22a74f41b9a5ce8", + "File bacsum": "5d5e2d60857ab3e0669514ee19cb3e54", + "File barplot": "cf063ee66778585dfc7243056b3f68e5", + "File bin_fasta_files": [ + "4c43133298d71002259f856720995efb", + "0bb311382c673962f0f7587eb614ea33", + "c6b739ac09ea09f5a5b243fc81ed075f", + "d8ed5ba779e0c17bfac43e46717b7df6", + "9c752f3ab53db1304bc115d7b112122f", + "9d52837869fcb54b8f35d2b07c536d16", + "6264770bfb30cd917cba490fe4496827", + "5caf431a8a98222902e18d6b619b94e3", + "ddab74b9de92c0366bd6ac3a90ec3ac5", + "b636e39db668cf7f1082b38de5565c84", + "eeef47252a10fc24c9ca367f38479a98", + "17899bef5c05f0f67c0688e9bfa7b84f", + "3cd99095e4e49337f1a0ba8e2e78b8d4", + "557f966b1240c5ad8e0e881db3b7c763", + "445e37d0f2ec02326de0a67215bc9b92", + "ee0bfc516fa184e588c21a943c2b75e0", + "8ceebf37f47bc4eb3d440cd4f97ad726", + "015dc9c1217aff8439ddf12df1c870f8", + "dd8c5036801f9c357bd8f79fce9309df", + "584f8a3b2eb9135794f45ed7d65ac3cd", + "224ec5bb64ec54b6a77eabf1d4a55512", + "ebe11fe4d6ec9d1dd79768a7b6df98b2", + "6e94dea2ac00c4ca0b0d80e6bd0e9a76", + "5fe34ad29b1856344ff826771cadd9a3", + "13ff348208368e367081f0cadc954eb9", + "d962ed10fe72bfbdf22f51d9bc46e5db", + "75d195250615dd560d442239956588fb", + "40710b79ded1cd6ea6b5b95eaff75cf4", + "72c6b89521277756677c699f911b75c9", + "6866bc52a0883b1b8449df01ce623421", + "5a2d2714f8a59e266553e53fff498490", + "eb0d7cca4fac2d7745534e671182ba16", + "d3023a46ff4c15255de05dfd50d9b18e" + ], + "File mbin_version": "464f8330e7c6487d06fcb6ca95f74337", + "File sorted_bam": "61d7bc083c1417c3d390cdac26c24faf", + "String container": "2B6029E738D4565E38624C1F8EFB1683", + "File unbinned": "aef4cc7d1355c65590eab99dd9115152", + "File mbin_sdb": "363df47ccc3f41da9231a864cc25065a", + "File checkm": "855941a010c3a8884f8ab080310c1be7", + "File ko_matrix": "cf063ee66778585dfc7243056b3f68e5", + "File hqmq_bin_fasta_files": [ + "4d9b99511546584bce7c7fab1fb3239a", + "a4eac78eb4c8b0ecf0e8756233771ace", + "35b44a186ca246254e2ed7e71863a4b7", + "07be3fec5a7a91e3a2cf645dc5dd8f35", + "e0aad9a4ab114fce04b3c8f237947177", + "7de3db5f8dfb1dd3939a57988df964c3", + "c3b17de2b3762611e07796c1d9f071db", + "7371ac1f2883778219ea5384aab65cf0", + "10ffa413343a45a3e2483ddde6714372", + "df5bc7b394e2ff05ad66b13e678af5ec", + "d86ad0e91c260e66ae0aae5215117766", + "f31f3cecbc75788f542bf8a282e809d4", + "b4a919b6d17530372401f4265f4280d5", + "fb687e086e66cfd6d1c9562227ff82eb" + ], + "File eukcc_file": "d97456254b77ef7870e6a4b238afd00e", + "File kronaplot": "cf063ee66778585dfc7243056b3f68e5", + "File heatmap": "cf063ee66778585dfc7243056b3f68e5", + "String __prefix": "D99564A36EECD8686D8B31D929BEB1AD", + "String start": "51A09424D260EDD03B8F5D99411B03D6", + "File low": "d41d8cd98f00b204e9800998ecf8427e", + "File short": "6eaf12e65d6a2c80b598ae4d568bf902", + "File contigs": "f89ee38065ee7324bdbd46c627faae33", + "File anno_gff": "ea2c8bf4db0775c024361a3681f5e365", + "String proj": "0B6C033A7D83F6E4B430E858C83BD9F7", + "Int __n_hqmq": "CFCD208495D565EF66E7DFF9F98764DA" + } + }, + "effectiveCallCachingMode": "ReadAndWriteCache" + }, + "inputs": { + "stats_json": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/MAGs_stats.json", + "proj": "nmdc:wfmag-12-fxwdrv82.1", + "arcsum": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/gtdbtk-output/gtdbtk.ar122.summary.tsv", + "barplot": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/execution/nmdc_wfmag-12-fxwdrv82.1_barplot.pdf", + "prefix": null, + "short": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/bins.tooShort.fa", + "container": "microbiomedata/workflowmeta:1.1.1", + "ko_matrix": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/execution/nmdc_wfmag-12-fxwdrv82.1_module_completeness.tab", + "stats_tsv": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/MAGs_stats.tsv", + "heatmap": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/execution/nmdc_wfmag-12-fxwdrv82.1_heatmap.pdf", + "hqmq_bin_fasta_files": [ + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.1.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.11.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.12.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.17.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.20.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.21.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.22.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.33.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.37.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.38.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.40.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.42.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.43.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-02e2568c38f882b2a238a02d6634abe0/bins.7.fa" + ], + "bin_fasta_files": [ + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.10.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.13.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.14.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.15.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.16.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.18.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.19.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.2.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.23.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.24.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.25.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.26.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.27.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.28.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.29.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.3.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.30.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.31.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.32.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.34.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.35.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.36.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.39.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.4.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.41.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.44.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.45.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.46.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.47.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.5.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.6.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.8.fa", + "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/glob-cfa371426bf53219d565e0cd37e297b4/bins.9.fa" + ], + "sorted_bam": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/pairedMapped_sorted.bam", + "n_hqmq": null, + "low": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/bins.lowDepth.fa", + "__prefix": "nmdc_wfmag-12-fxwdrv82.1", + "contigs": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/contigs.fasta", + "n_lq": null, + "checkm": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/checkm-qa.out", + "unbinned": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/bins.unbinned.fa", + "mbin_version": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/mbin_nmdc_versions.log", + "mbin_sdb": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/mbin.sdb", + "bacsum": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/gtdbtk-output/gtdbtk.bac120.summary.tsv", + "start": "2024-07-01T17:24:07+00:00", + "hqmq_bin_tarfiles": [], + "kronaplot": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-package/execution/nmdc_wfmag-12-fxwdrv82.1_ko_krona.html", + "lq_bin_tarfiles": [], + "__n_hqmq": 0, + "eukcc_file": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-mbin_nmdc/execution/eukcc_output/eukcc.csv.final", + "anno_gff": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-stage/execution/functional_annotation.gff", + "__n_lq": 0 + }, + "returnCode": 0, + "jobId": "157726", + "backend": "HtCondor", + "end": "2024-07-01T19:55:37.633Z", + "dockerImageUsed": "microbiomedata/workflowmeta@sha256:f0ca787887e43851bbda55bef4c7b68513ffd7940e1f24f586c1d6fe220624a8", + "stderr": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags/execution/stderr", + "callRoot": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1/call-finish_mags", + "attempt": 1, + "executionEvents": [ + { + "startTime": "2024-07-01T19:53:45.804Z", + "description": "PreparingJob", + "endTime": "2024-07-01T19:53:46.644Z" + }, + { + "startTime": "2024-07-01T19:53:46.673Z", + "description": "RunningJob", + "endTime": "2024-07-01T19:55:33.863Z" + }, + { + "startTime": "2024-07-01T19:55:36.669Z", + "description": "UpdatingJobStore", + "endTime": "2024-07-01T19:55:37.633Z" + }, + { + "startTime": "2024-07-01T19:53:39.422Z", + "description": "RequestingExecutionToken", + "endTime": "2024-07-01T19:53:45.803Z" + }, + { + "startTime": "2024-07-01T19:53:39.414Z", + "description": "Pending", + "endTime": "2024-07-01T19:53:39.422Z" + }, + { + "startTime": "2024-07-01T19:53:45.803Z", + "description": "WaitingForValueStore", + "endTime": "2024-07-01T19:53:45.804Z" + }, + { + "startTime": "2024-07-01T19:53:46.644Z", + "description": "CallCacheReading", + "endTime": "2024-07-01T19:53:46.673Z" + }, + { + "startTime": "2024-07-01T19:55:33.863Z", + "description": "UpdatingCallCache", + "endTime": "2024-07-01T19:55:36.669Z" + } + ], + "start": "2024-07-01T19:53:39.408Z" + } + ] + }, + "outputs": { + "nmdc_mags.kronaplot": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_kronaplot.html", + "nmdc_mags.short": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bins.tooShort.fa", + "nmdc_mags.final_unbinned_fa": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bins.unbinned.fa", + "nmdc_mags.heatmap": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_heatmap.pdf", + "nmdc_mags.final_gtdbtk_ar_summary": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_gtdbtk.ar122.summary.tsv", + "nmdc_mags.final_hqmq_bins_zip": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_hqmq_bin.zip", + "nmdc_mags.final_checkm": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_checkm_qa.out", + "nmdc_mags.mags_version": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bin.info", + "nmdc_mags.final_lq_bins_zip": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_lq_bin.zip", + "nmdc_mags.barplot": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_barplot.pdf", + "nmdc_mags.final_gtdbtk_bac_summary": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_gtdbtk.bac122.summary.tsv", + "nmdc_mags.low": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bins.lowDepth.fa", + "nmdc_mags.final_stats_json": "tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_mags_stats.json" + }, + "workflowRoot": "/pscratch/sd/n/nmdcda/cromwell-executions/nmdc_mags/29628c3e-8850-4210-927a-1d4258fa35d1", + "actualWorkflowLanguage": "WDL", + "id": "29628c3e-8850-4210-927a-1d4258fa35d1", + "inputs": { + "checkm_db": "/refdata/checkM_DB/checkm_data_2015_01_16", + "map_file": null, + "nmdc_mags.stage.products_out": "products.tsv", + "nmdc_mags.stage.cath_funfam_out": "cath_funfam.gff", + "eukcc2_db": "/refdata/EUKCC2_DB/eukcc2_db_ver_1.2", + "proj": "nmdc:wfmag-12-fxwdrv82.1", + "gtdbtk_db": "/refdata/GTDBTK_DB/gtdbtk_release207_v2", + "nmdc_mags.package.prefix": null, + "smart_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_smart.gff", + "lineage_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_scaffold_lineage.tsv", + "nmdc_mags.stage.proteins_out": "proteins.faa", + "cog_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_cog.gff", + "nmdc_mags.finish_mags.n_hqmq": null, + "nmdc_mags.finish_mags.n_lq": null, + "container": "microbiomedata/nmdc_mbin@sha256:57930406fb5cc364bacfc904066519de6cdc2d0ceda9db0eebf2336df3ef5349", + "proteins_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_proteins.faa", + "nmdc_mags.stage.gff_out": "functional_annotation.gff", + "nmdc_mags.stage.ko_out": "ko.tsv", + "nmdc_mags.stage.cog_out": "cog.gff", + "scratch_dir": null, + "nmdc_mags.stage.supfam_out": "supfam.gff", + "nmdc_mags.stage.contigs_out": "contigs.fasta", + "product_names_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_product_names.tsv", + "nmdc_mags.stage.gene_phylogeny_out": "gene_phylogeny.tsv", + "package_container": "microbiomedata/nmdc_mbin_vis:0.2.0", + "nmdc_mags.stage.pfam_out": "pfam.gff", + "nmdc_mags.stage.bam_out": "pairedMapped_sorted.bam", + "nmdc_mags.stage.smart_out": "smart.gff", + "cath_funfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_cath_funfam.gff", + "supfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_supfam.gff", + "cpu": 32, + "nmdc_mags.stage.tigrfam_out": "tigrfam.gff", + "gene_phylogeny_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_gene_phylogeny.tsv", + "nmdc_mags.stage.ec_out": "ec.tsv", + "ko_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_ko.tsv", + "ec_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_ec.tsv", + "gff_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_functional_annotation.gff", + "pfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_pfam.gff", + "threads": 64, + "nmdc_mags.finish_mags.prefix": null, + "nmdc_mags.stage.lineage_out": "lineage.tsv", + "tigrfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgan-12-gbysvd76.1/nmdc_wfmgan-12-gbysvd76.1_tigrfam.gff", + "pthreads": 1, + "sam_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgas-13-56028x05.1/nmdc_wfmgas-13-56028x05.1_pairedMapped_sorted.bam", + "contig_file": "https://data.microbiomedata.org/data/nmdc:omprc-13-bje6kq59/nmdc:wfmgas-13-56028x05.1/nmdc_wfmgas-13-56028x05.1_contigs.fna" + }, + "labels": { + "wdl": "mbin_nmdc.wdl", + "git_repo": "https://github.com/microbiomedata/metaMAGs", + "pipeline_version": "v1.3.2", + "cromwell-workflow-id": "cromwell-29628c3e-8850-4210-927a-1d4258fa35d1", + "pipeline": "mbin_nmdc.wdl", + "opid": "nmdc:sys0v1137690", + "activity_id": "nmdc:wfmag-12-fxwdrv82.1", + "submitter": "nmdcda", + "release": "v1.3.2" + }, + "submission": "2024-07-01T16:54:45.797Z", + "status": "Succeeded", + "end": "2024-07-01T19:55:38.764Z", + "start": "2024-07-01T16:54:56.053Z" +} \ No newline at end of file diff --git a/tests/fixtures/mags_job_record.json b/tests/fixtures/mags_job_record.json new file mode 100644 index 00000000..7bbea6b7 --- /dev/null +++ b/tests/fixtures/mags_job_record.json @@ -0,0 +1,226 @@ +{ + "workflow" : { + "id" : "MAGs: v1.0.6" + }, + "id" : "nmdc:ab9a3d70-2bfa-11ee-940d-4eb93f6e7850", + "created_at" : "2023-07-26T21:23:31.000+0000", + "config" : { + "git_repo" : "https://github.com/microbiomedata/metaMAGs", + "release" : "v1.0.6", + "wdl" : "mbin_nmdc.wdl", + "activity_id" : "nmdc:wfmag-11-05myyz45.1", + "activity_set" : "mags_activity_set", + "was_informed_by" : "nmdc:omprc-11-vpqmce67", + "trigger_activity" : "nmdc:wfmgan-11-wdx72h27.1", + "iteration" : 1, + "input_prefix" : "nmdc_mags", + "inputs" : { + "contig_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgas-11-90bn3y70.1/nmdc_wfmgas-11-90bn3y70.1_contigs.fna", + "gff_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_functional_annotation.gff", + "cath_funfam_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_cath_funfam.gff", + "supfam_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_supfam.gff", + "cog_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_cog.gff", + "proj_name" : "nmdc:wfmag-11-05myyz45.1", + "pfam_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_pfam.gff", + "product_names_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_product_names.tsv", + "tigrfam_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_tigrfam.gff", + "ec_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_ec.tsv", + "ko_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_ko.tsv", + "lineage_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_scaffold_lineage.tsv", + "sam_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgas-11-90bn3y70.1/nmdc_wfmgas-11-90bn3y70.1_pairedMapped_sorted.bam", + "smart_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_smart.gff", + "proteins_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_proteins.faa", + "gene_phylogeny_file" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_gene_phylogeny.tsv", + "proj" : "nmdc:wfmag-11-05myyz45.1" + }, + "input_data_objects" : [ + { + "id" : "nmdc:dobj-11-9mkb6w25", + "name" : "nmdc_wfmgas-11-90bn3y70.1_contigs.fna", + "description" : "Assembly contigs for nmdc:wfmgas-11-90bn3y70.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgas-11-90bn3y70.1/nmdc_wfmgas-11-90bn3y70.1_contigs.fna", + "md5_checksum" : "64ac183a6f9c497fa6ae43cc2aa1ca6e", + "file_size_bytes" : 26375887, + "data_object_type" : "Assembly Contigs" + }, + { + "id" : "nmdc:dobj-11-gt7grc22", + "name" : "nmdc_wfmgan-11-wdx72h27.1_functional_annotation.gff", + "description" : "Functional Annotation for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_functional_annotation.gff", + "md5_checksum" : "4a0d219fa5c4cbbcf9ea09bb8adaa95c", + "file_size_bytes" : 21676898, + "data_object_type" : "Functional Annotation GFF" + }, + { + "id" : "nmdc:dobj-11-20kgjz21", + "name" : "nmdc_wfmgan-11-wdx72h27.1_cath_funfam.gff", + "description" : "CATH FunFams for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_cath_funfam.gff", + "md5_checksum" : "b741c2924517489a3dae08faec12a275", + "file_size_bytes" : 11283509, + "data_object_type" : "CATH FunFams (Functional Families) Annotation GFF" + }, + { + "id" : "nmdc:dobj-11-rse8j628", + "name" : "nmdc_wfmgan-11-wdx72h27.1_supfam.gff", + "description" : "SUPERFam Annotations for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_supfam.gff", + "md5_checksum" : "fd6e6169b921ccbb60a29972ae048a19", + "file_size_bytes" : 15369673, + "data_object_type" : "SUPERFam Annotation GFF" + }, + { + "id" : "nmdc:dobj-11-vkz6mc22", + "name" : "nmdc_wfmgan-11-wdx72h27.1_cog.gff", + "description" : "COGs for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_cog.gff", + "md5_checksum" : "91f8c708ac8a00aa1459b7f2e297499d", + "file_size_bytes" : 12788976, + "data_object_type" : "Clusters of Orthologous Groups (COG) Annotation GFF" + }, + { + "id" : "nmdc:dobj-11-ywbazd98", + "name" : "nmdc_wfmgan-11-wdx72h27.1_pfam.gff", + "description" : "Pfam Annotation for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_pfam.gff", + "md5_checksum" : "5c513e356fc7104b8b552b68e838bbad", + "file_size_bytes" : 9682652, + "data_object_type" : "Pfam Annotation GFF" + }, + { + "id" : "nmdc:dobj-11-s0swen20", + "name" : "nmdc_wfmgan-11-wdx72h27.1_product_names.tsv", + "description" : "Product names for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_product_names.tsv", + "md5_checksum" : "2a81f01bb1eb5ed849cb00b0b45adcf7", + "file_size_bytes" : 6526155, + "data_object_type" : "Product Names" + }, + { + "id" : "nmdc:dobj-11-mxwbrg81", + "name" : "nmdc_wfmgan-11-wdx72h27.1_tigrfam.gff", + "description" : "TIGRFam for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_tigrfam.gff", + "md5_checksum" : "b709aa5a09615e9a8ac34c9e88d658e3", + "file_size_bytes" : 869578, + "data_object_type" : "TIGRFam Annotation GFF" + }, + { + "id" : "nmdc:dobj-11-n5hs3k52", + "name" : "nmdc_wfmgan-11-wdx72h27.1_ec.tsv", + "description" : "EC Annotations for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_ec.tsv", + "md5_checksum" : "c36eb4d979461c3346308e9d9bc2252e", + "file_size_bytes" : 2104885, + "data_object_type" : "Annotation Enzyme Commission" + }, + { + "id" : "nmdc:dobj-11-bvge9w42", + "name" : "nmdc_wfmgan-11-wdx72h27.1_ko.tsv", + "description" : "KEGG Orthology for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_ko.tsv", + "md5_checksum" : "2a99a362b0b5d0559ae6d46636d82b27", + "file_size_bytes" : 3088516, + "data_object_type" : "Annotation KEGG Orthology" + }, + { + "id" : "nmdc:dobj-11-m19exh45", + "name" : "nmdc_wfmgan-11-wdx72h27.1_scaffold_lineage.tsv", + "description" : "Scaffold Lineage tsv for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_scaffold_lineage.tsv", + "md5_checksum" : "784b9fb9bc77c00107a51131362f04f9", + "file_size_bytes" : 9663889, + "data_object_type" : "Scaffold Lineage tsv" + }, + { + "id" : "nmdc:dobj-11-bg0j9849", + "name" : "nmdc_wfmgas-11-90bn3y70.1_pairedMapped_sorted.bam", + "description" : "Sorted Bam for nmdc:wfmgas-11-90bn3y70.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgas-11-90bn3y70.1/nmdc_wfmgas-11-90bn3y70.1_pairedMapped_sorted.bam", + "md5_checksum" : "630401367bd703262f52d95c4e5b22d1", + "file_size_bytes" : 1747712213, + "data_object_type" : "Assembly Coverage BAM" + }, + { + "id" : "nmdc:dobj-11-r40xwr84", + "name" : "nmdc_wfmgan-11-wdx72h27.1_smart.gff", + "description" : "SMART Annotations for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_smart.gff", + "md5_checksum" : "fa161d364f57eb6aa3af483a90d14cd5", + "file_size_bytes" : 2517672, + "data_object_type" : "SMART Annotation GFF" + }, + { + "id" : "nmdc:dobj-11-5kk68p73", + "name" : "nmdc_wfmgan-11-wdx72h27.1_proteins.faa", + "description" : "FASTA Amino Acid File for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_proteins.faa", + "md5_checksum" : "331bce1648f53e5631753f96ee79c250", + "file_size_bytes" : 15067142, + "data_object_type" : "Annotation Amino Acid FASTA" + }, + { + "id" : "nmdc:dobj-11-jfgh0180", + "name" : "nmdc_wfmgan-11-wdx72h27.1_gene_phylogeny.tsv", + "description" : "Gene Phylogeny for nmdc:wfmgan-11-wdx72h27.1", + "url" : "https://data.microbiomedata.org/data/nmdc:omprc-11-vpqmce67/nmdc:wfmgan-11-wdx72h27.1/nmdc_wfmgan-11-wdx72h27.1_gene_phylogeny.tsv", + "md5_checksum" : "16f945aa958cd0c13847d44a44736333", + "file_size_bytes" : 12195613, + "data_object_type" : "Gene Phylogeny tsv" + } + ], + "activity" : { + "name" : "Metagenome Assembled Genomes Analysis Activity for {id}", + "type" : "nmdc:MagsAnalysisActivity" + }, + "outputs" : [ + { + "output" : "final_checkm", + "data_object_type" : "CheckM Statistics", + "description" : "CheckM for {id}", + "name" : "CheckM statistics report", + "suffix" : "_checkm_qa.out", + "id" : "nmdc:dobj-11-yrzfq471" + }, + { + "output" : "final_hqmq_bins_zip", + "data_object_type" : "Metagenome Bins", + "description" : "Metagenome Bins for {id}", + "name" : "Metagenome bin tarfiles archive", + "suffix" : "_hqmq_bin.zip", + "id" : "nmdc:dobj-11-dsbday74" + }, + { + "output" : "final_gtdbtk_bac_summary", + "data_object_type" : "GTDBTK Bacterial Summary", + "description" : "Bacterial Summary for {id}", + "name" : "GTDBTK bacterial summary", + "suffix" : "_gtdbtk.bac122.summary.tsv", + "id" : "nmdc:dobj-11-104ypv57" + }, + { + "output" : "final_gtdbtk_ar_summary", + "data_object_type" : "GTDBTK Archaeal Summary", + "description" : "Archaeal Summary for {id}", + "name" : "GTDBTK archaeal summary", + "suffix" : "_gtdbtk.ar122.summary.tsv", + "id" : "nmdc:dobj-11-t1v6w944" + }, + { + "output" : "mags_version", + "data_object_type" : "Metagenome Bins Info File", + "description" : "Metagenome Bins Info File for {id}", + "name" : "Metagenome Bins Info File", + "suffix" : "_bin.info", + "id" : "nmdc:dobj-11-0c397145" + } + ] + }, + "claims" : [ + { + "op_id" : "nmdc:sys0jm1cts41", + "site_id" : "NERSC" + } + ] +} \ No newline at end of file diff --git a/tests/fixtures/mags_workflow_record.json b/tests/fixtures/mags_workflow_record.json new file mode 100644 index 00000000..df222d83 --- /dev/null +++ b/tests/fixtures/mags_workflow_record.json @@ -0,0 +1,221 @@ +{ + "id": "nmdc:wfmag-11-g7msr323.1", + "type": "nmdc:MagsAnalysis", + "name": "Metagenome Assembled Genomes Analysis for nmdc:wfmag-11-g7msr323.1", + "git_url": "https://github.com/microbiomedata/metaMAGs", + "execution_resource": "NERSC-Perlmutter", + "was_informed_by": "nmdc:omprc-11-9cdxha98", + "has_input": [ + "nmdc:dobj-11-1x850k20", + "nmdc:dobj-11-fkj2kt47", + "nmdc:dobj-11-f9rnav80", + "nmdc:dobj-11-btqzf393", + "nmdc:dobj-11-hdty3m42", + "nmdc:dobj-11-0gk70187", + "nmdc:dobj-11-3mtmhf26", + "nmdc:dobj-11-7kfhf682", + "nmdc:dobj-11-9hjg8y84", + "nmdc:dobj-11-2x0wy902", + "nmdc:dobj-11-r0bx4g71", + "nmdc:dobj-11-7mj15p44", + "nmdc:dobj-11-r2zqpy26", + "nmdc:dobj-11-4k2bt072" + ], + "started_at_time": "2024-09-16T19:33:32.562412+00:00", + "ended_at_time": "2024-09-16T21:52:12.873101+00:00", + "version": "v1.3.10", + "has_output": [ + "nmdc:dobj-11-xvjz5h55", + "nmdc:dobj-11-85q1v678", + "nmdc:dobj-11-j5p58211", + "nmdc:dobj-11-ec2fqk35", + "nmdc:dobj-11-kg68h909" + ], + "binned_contig_num": 27214, + "mags_list": [ + { + "bin_name": "bins.40", + "number_of_contig": 44, + "completeness": 97.3, + "contamination": 3.38, + "total_bases": 0, + "gene_count": "null", + "bin_quality": "MQ", + "num_16s": 0, + "num_5s": 0, + "num_23s": 0, + "num_tRNA": 0, + "gtdbtk_domain": "Bacteria", + "gtdbtk_phylum": "Verrucomicrobiota", + "gtdbtk_class": "Verrucomicrobiae", + "gtdbtk_order": "Pedosphaerales", + "gtdbtk_family": "UBA11358", + "gtdbtk_genus": "UBA11358", + "gtdbtk_species": "null", + "members_id": [ + "nmdc:wfmgas-13-56028x05.1_7_c1", + "nmdc:wfmgas-13-56028x05.1_9_c1", + "nmdc:wfmgas-13-56028x05.1_16_c1", + "nmdc:wfmgas-13-56028x05.1_20_c1", + "nmdc:wfmgas-13-56028x05.1_23_c1", + "nmdc:wfmgas-13-56028x05.1_27_c1", + "nmdc:wfmgas-13-56028x05.1_45_c1", + "nmdc:wfmgas-13-56028x05.1_55_c1", + "nmdc:wfmgas-13-56028x05.1_71_c1", + "nmdc:wfmgas-13-56028x05.1_79_c1", + "nmdc:wfmgas-13-56028x05.1_99_c1", + "nmdc:wfmgas-13-56028x05.1_52_c2", + "nmdc:wfmgas-13-56028x05.1_127_c1", + "nmdc:wfmgas-13-56028x05.1_131_c1", + "nmdc:wfmgas-13-56028x05.1_137_c1", + "nmdc:wfmgas-13-56028x05.1_169_c1", + "nmdc:wfmgas-13-56028x05.1_200_c1", + "nmdc:wfmgas-13-56028x05.1_212_c1", + "nmdc:wfmgas-13-56028x05.1_223_c1", + "nmdc:wfmgas-13-56028x05.1_372_c1", + "nmdc:wfmgas-13-56028x05.1_393_c1", + "nmdc:wfmgas-13-56028x05.1_428_c1", + "nmdc:wfmgas-13-56028x05.1_52_c1", + "nmdc:wfmgas-13-56028x05.1_582_c1", + "nmdc:wfmgas-13-56028x05.1_706_c1", + "nmdc:wfmgas-13-56028x05.1_888_c1", + "nmdc:wfmgas-13-56028x05.1_912_c1", + "nmdc:wfmgas-13-56028x05.1_1268_c1", + "nmdc:wfmgas-13-56028x05.1_1271_c1", + "nmdc:wfmgas-13-56028x05.1_1492_c1", + "nmdc:wfmgas-13-56028x05.1_1494_c1", + "nmdc:wfmgas-13-56028x05.1_1604_c1", + "nmdc:wfmgas-13-56028x05.1_1627_c1", + "nmdc:wfmgas-13-56028x05.1_1888_c1", + "nmdc:wfmgas-13-56028x05.1_1938_c1", + "nmdc:wfmgas-13-56028x05.1_2944_c1", + "nmdc:wfmgas-13-56028x05.1_3261_c1", + "nmdc:wfmgas-13-56028x05.1_3477_c1", + "nmdc:wfmgas-13-56028x05.1_4194_c1", + "nmdc:wfmgas-13-56028x05.1_6257_c1", + "nmdc:wfmgas-13-56028x05.1_7589_c1", + "nmdc:wfmgas-13-56028x05.1_10469_c1", + "nmdc:wfmgas-13-56028x05.1_10553_c1", + "nmdc:wfmgas-13-56028x05.1_13792_c1" + ] + }, + { + "bin_name": "bins.9", + "number_of_contig": 92, + "completeness": 0.0, + "contamination": 0.0, + "total_bases": 0, + "gene_count": "null", + "bin_quality": "LQ", + "num_16s": 0, + "num_5s": 0, + "num_23s": 0, + "num_tRNA": 0, + "gtdbtk_domain": "null", + "gtdbtk_phylum": "null", + "gtdbtk_class": "null", + "gtdbtk_order": "null", + "gtdbtk_family": "null", + "gtdbtk_genus": "null", + "gtdbtk_species": "null", + "members_id": [ + "nmdc:wfmgas-13-56028x05.1_7094_c1", + "nmdc:wfmgas-13-56028x05.1_9486_c1", + "nmdc:wfmgas-13-56028x05.1_9853_c1", + "nmdc:wfmgas-13-56028x05.1_10857_c1", + "nmdc:wfmgas-13-56028x05.1_11702_c1", + "nmdc:wfmgas-13-56028x05.1_12042_c1", + "nmdc:wfmgas-13-56028x05.1_14174_c1", + "nmdc:wfmgas-13-56028x05.1_14597_c1", + "nmdc:wfmgas-13-56028x05.1_16115_c1", + "nmdc:wfmgas-13-56028x05.1_16261_c1", + "nmdc:wfmgas-13-56028x05.1_16795_c1", + "nmdc:wfmgas-13-56028x05.1_16943_c1", + "nmdc:wfmgas-13-56028x05.1_17208_c1", + "nmdc:wfmgas-13-56028x05.1_17245_c1", + "nmdc:wfmgas-13-56028x05.1_17383_c1", + "nmdc:wfmgas-13-56028x05.1_17783_c1", + "nmdc:wfmgas-13-56028x05.1_18468_c1", + "nmdc:wfmgas-13-56028x05.1_18553_c1", + "nmdc:wfmgas-13-56028x05.1_18858_c1", + "nmdc:wfmgas-13-56028x05.1_19302_c1", + "nmdc:wfmgas-13-56028x05.1_19824_c1", + "nmdc:wfmgas-13-56028x05.1_20316_c1", + "nmdc:wfmgas-13-56028x05.1_20787_c1", + "nmdc:wfmgas-13-56028x05.1_21029_c1", + "nmdc:wfmgas-13-56028x05.1_21435_c1", + "nmdc:wfmgas-13-56028x05.1_21475_c1", + "nmdc:wfmgas-13-56028x05.1_21484_c1", + "nmdc:wfmgas-13-56028x05.1_21518_c1", + "nmdc:wfmgas-13-56028x05.1_21685_c1", + "nmdc:wfmgas-13-56028x05.1_21809_c1", + "nmdc:wfmgas-13-56028x05.1_21924_c1", + "nmdc:wfmgas-13-56028x05.1_21958_c1", + "nmdc:wfmgas-13-56028x05.1_22186_c1", + "nmdc:wfmgas-13-56028x05.1_22271_c1", + "nmdc:wfmgas-13-56028x05.1_22516_c1", + "nmdc:wfmgas-13-56028x05.1_22514_c1", + "nmdc:wfmgas-13-56028x05.1_22777_c1", + "nmdc:wfmgas-13-56028x05.1_23003_c1", + "nmdc:wfmgas-13-56028x05.1_23115_c1", + "nmdc:wfmgas-13-56028x05.1_23204_c1", + "nmdc:wfmgas-13-56028x05.1_23239_c1", + "nmdc:wfmgas-13-56028x05.1_23352_c1", + "nmdc:wfmgas-13-56028x05.1_23445_c1", + "nmdc:wfmgas-13-56028x05.1_23505_c1", + "nmdc:wfmgas-13-56028x05.1_23571_c1", + "nmdc:wfmgas-13-56028x05.1_24047_c1", + "nmdc:wfmgas-13-56028x05.1_24749_c1", + "nmdc:wfmgas-13-56028x05.1_24981_c1", + "nmdc:wfmgas-13-56028x05.1_25059_c1", + "nmdc:wfmgas-13-56028x05.1_25526_c1", + "nmdc:wfmgas-13-56028x05.1_26162_c1", + "nmdc:wfmgas-13-56028x05.1_26376_c1", + "nmdc:wfmgas-13-56028x05.1_26773_c1", + "nmdc:wfmgas-13-56028x05.1_26816_c1", + "nmdc:wfmgas-13-56028x05.1_26891_c1", + "nmdc:wfmgas-13-56028x05.1_27179_c1", + "nmdc:wfmgas-13-56028x05.1_27272_c1", + "nmdc:wfmgas-13-56028x05.1_27358_c1", + "nmdc:wfmgas-13-56028x05.1_27411_c1", + "nmdc:wfmgas-13-56028x05.1_27550_c1", + "nmdc:wfmgas-13-56028x05.1_28892_c1", + "nmdc:wfmgas-13-56028x05.1_29003_c1", + "nmdc:wfmgas-13-56028x05.1_29238_c1", + "nmdc:wfmgas-13-56028x05.1_29324_c1", + "nmdc:wfmgas-13-56028x05.1_29771_c1", + "nmdc:wfmgas-13-56028x05.1_29878_c1", + "nmdc:wfmgas-13-56028x05.1_30248_c1", + "nmdc:wfmgas-13-56028x05.1_30476_c1", + "nmdc:wfmgas-13-56028x05.1_30587_c1", + "nmdc:wfmgas-13-56028x05.1_31160_c1", + "nmdc:wfmgas-13-56028x05.1_31834_c1", + "nmdc:wfmgas-13-56028x05.1_31922_c1", + "nmdc:wfmgas-13-56028x05.1_31971_c1", + "nmdc:wfmgas-13-56028x05.1_32244_c1", + "nmdc:wfmgas-13-56028x05.1_32605_c1", + "nmdc:wfmgas-13-56028x05.1_32623_c1", + "nmdc:wfmgas-13-56028x05.1_32832_c1", + "nmdc:wfmgas-13-56028x05.1_33068_c1", + "nmdc:wfmgas-13-56028x05.1_33334_c1", + "nmdc:wfmgas-13-56028x05.1_33438_c1", + "nmdc:wfmgas-13-56028x05.1_33855_c1", + "nmdc:wfmgas-13-56028x05.1_34035_c1", + "nmdc:wfmgas-13-56028x05.1_34120_c1", + "nmdc:wfmgas-13-56028x05.1_34140_c1", + "nmdc:wfmgas-13-56028x05.1_34133_c1", + "nmdc:wfmgas-13-56028x05.1_34177_c1", + "nmdc:wfmgas-13-56028x05.1_34481_c1", + "nmdc:wfmgas-13-56028x05.1_34728_c1", + "nmdc:wfmgas-13-56028x05.1_34843_c1", + "nmdc:wfmgas-13-56028x05.1_35665_c1", + "nmdc:wfmgas-13-56028x05.1_35772_c1", + "nmdc:wfmgas-13-56028x05.1_35995_c1" + ] + } + ], + "too_short_contig_num": 2005162, + "input_contig_num": 2273412, + "unbinned_contig_num": 241036, + "low_depth_contig_num": 0 +} \ No newline at end of file diff --git a/tests/fixtures/mags_workflow_state.json b/tests/fixtures/mags_workflow_state.json new file mode 100644 index 00000000..7cf173f0 --- /dev/null +++ b/tests/fixtures/mags_workflow_state.json @@ -0,0 +1,216 @@ +{ + "type": "MAGs: v1.3.10", + "cromwell_jobid": "9492a397-eb30-472b-9d3b-b44b676f4652", + "nmdc_jobid": "nmdc:66cf64b6-7462-11ef-8b84-deaa01ab0f49", + "conf": { + "git_repo": "https://github.com/microbiomedata/metaMAGs", + "release": "v1.3.10", + "wdl": "mbin_nmdc.wdl", + "activity_id": "nmdc:wfmag-11-g7msr323.1", + "activity_set": "mags_activity_set", + "was_informed_by": "nmdc:omprc-11-9cdxha98", + "trigger_activity": "nmdc:wfmgan-11-jv8kx789.1", + "iteration": 1, + "input_prefix": "nmdc_mags", + "inputs": { + "proj": "nmdc:wfmag-11-g7msr323.1", + "contig_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "sam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgas-11-0qvjnc54.1/nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "gff_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "proteins_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "cog_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "ec_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "ko_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "pfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "tigrfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "crispr_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "product_names_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "gene_phylogeny_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "lineage_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "map_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv" + }, + "input_data_objects": [ + { + "id": "nmdc:dobj-11-1x850k20", + "name": "nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "description": "Assembly contigs (remapped) for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "md5_checksum": "6debed079383eeca2045ce23b0576607", + "file_size_bytes": 2084209623, + "data_object_type": "Assembly Contigs" + }, + { + "id": "nmdc:dobj-11-fkj2kt47", + "name": "nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-9cdxha98", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgas-11-0qvjnc54.1/nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "md5_checksum": "88ec004bd037a3820060427098798666", + "file_size_bytes": 15704979428, + "data_object_type": "Assembly Coverage BAM" + }, + { + "id": "nmdc:dobj-11-f9rnav80", + "name": "nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "description": "Functional Annotation for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "md5_checksum": "349cae9b4fe62bb910f08a183e57b475", + "file_size_bytes": 1320869282, + "data_object_type": "Functional Annotation GFF" + }, + { + "id": "nmdc:dobj-11-btqzf393", + "name": "nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "description": "FASTA Amino Acid File for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "md5_checksum": "292eae73923605dae2ef9f5d582e4603", + "file_size_bytes": 1075716574, + "data_object_type": "Annotation Amino Acid FASTA" + }, + { + "id": "nmdc:dobj-11-hdty3m42", + "name": "nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "description": "COGs for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "md5_checksum": "c4d1121c1ceb1229afb7190d23553003", + "file_size_bytes": 712459544, + "data_object_type": "Clusters of Orthologous Groups (COG) Annotation GFF" + }, + { + "id": "nmdc:dobj-11-0gk70187", + "name": "nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "description": "EC Annotations for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "md5_checksum": "84cf22f39532e1bd001bea8425735a82", + "file_size_bytes": 116429630, + "data_object_type": "Annotation Enzyme Commission" + }, + { + "id": "nmdc:dobj-11-3mtmhf26", + "name": "nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "description": "KEGG Orthology for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "md5_checksum": "17d699df17c97fc28796a198cf40a328", + "file_size_bytes": 169182276, + "data_object_type": "Annotation KEGG Orthology" + }, + { + "id": "nmdc:dobj-11-7kfhf682", + "name": "nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "description": "Pfam Annotation for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "md5_checksum": "23c33758dc138e1af0f39fa1f3ca07db", + "file_size_bytes": 602929841, + "data_object_type": "Pfam Annotation GFF" + }, + { + "id": "nmdc:dobj-11-9hjg8y84", + "name": "nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "description": "TIGRFam for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "md5_checksum": "bbfded219e0b359602725c9efb4f0c54", + "file_size_bytes": 61788991, + "data_object_type": "TIGRFam Annotation GFF" + }, + { + "id": "nmdc:dobj-11-2x0wy902", + "name": "nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "description": "Crispr Terms for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "md5_checksum": "9d2255a63e39552328c4da20ccf2bb3f", + "file_size_bytes": 142989, + "data_object_type": "Crispr Terms" + }, + { + "id": "nmdc:dobj-11-r0bx4g71", + "name": "nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "description": "Product names for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "md5_checksum": "6f1325b2f8dee9b2a75598fb9645c43d", + "file_size_bytes": 401118634, + "data_object_type": "Product Names" + }, + { + "id": "nmdc:dobj-11-7mj15p44", + "name": "nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "description": "Gene Phylogeny for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "md5_checksum": "037aee803f1b81ac5ac1bccb9a18527d", + "file_size_bytes": 748420652, + "data_object_type": "Gene Phylogeny tsv" + }, + { + "id": "nmdc:dobj-11-r2zqpy26", + "name": "nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "description": "Scaffold Lineage tsv for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "md5_checksum": "efdce9771cdda8bd8548e44ef6d1d3a3", + "file_size_bytes": 503898615, + "data_object_type": "Scaffold Lineage tsv" + }, + { + "id": "nmdc:dobj-11-4k2bt072", + "name": "nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv", + "description": "Contig mappings file for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv", + "md5_checksum": "1056a6ef48ce9124de0828ee85246e65", + "file_size_bytes": 250129248, + "data_object_type": "Contig Mapping File" + } + ], + "activity": { + "name": "Metagenome Assembled Genomes Analysis Activity for {id}", + "type": "nmdc:MagsAnalysisActivity", + "binned_contig_num": "{outputs.final_stats_json.binned_contig_num}", + "input_contig_num": "{outputs.final_stats_json.input_contig_num}", + "low_depth_contig_num": "{outputs.final_stats_json.low_depth_contig_num}", + "mags_list": "{outputs.final_stats_json.mags_list}", + "too_short_contig_num": "{outputs.final_stats_json.too_short_contig_num}", + "unbinned_contig_num": "{outputs.final_stats_json.unbinned_contig_num}" + }, + "outputs": [ + { + "output": "final_checkm", + "data_object_type": "CheckM Statistics", + "description": "CheckM for {id}", + "name": "CheckM statistics report", + "id": "nmdc:dobj-11-xvjz5h55" + }, + { + "output": "final_hqmq_bins_zip", + "data_object_type": "Metagenome Bins", + "description": "Metagenome Bins for {id}", + "name": "Metagenome bin tarfiles archive", + "id": "nmdc:dobj-11-85q1v678" + }, + { + "output": "final_gtdbtk_bac_summary", + "data_object_type": "GTDBTK Bacterial Summary", + "description": "Bacterial Summary for {id}", + "name": "GTDBTK bacterial summary", + "id": "nmdc:dobj-11-j5p58211" + }, + { + "output": "final_gtdbtk_ar_summary", + "data_object_type": "GTDBTK Archaeal Summary", + "description": "Archaeal Summary for {id}", + "name": "GTDBTK archaeal summary", + "suffix": "_gtdbtk.ar122.summary.tsv", + "id": "nmdc:dobj-11-ec2fqk35" + }, + { + "output": "mags_version", + "data_object_type": "Metagenome Bins Info File", + "description": "Metagenome Bins Info File for {id}", + "name": "Metagenome Bins Info File", + "id": "nmdc:dobj-11-kg68h909" + } + ] + }, + "activity_id": "nmdc:wfmag-11-g7msr323.1", + "last_status": "Succeeded", + "done": true, + "failed_count": 0, + "start": "2024-09-16T19:33:32.562412+00:00", + "end": "2024-09-16T21:52:12.873101+00:00", + "opid": "nmdc:sys0m369xp60" + } diff --git a/tests/fixtures/new_state_job.json b/tests/fixtures/new_state_job.json new file mode 100644 index 00000000..839dd828 --- /dev/null +++ b/tests/fixtures/new_state_job.json @@ -0,0 +1,215 @@ +{ + "type": "MAGs: v1.3.10", + "cromwell_jobid": "9492a397-eb30-472b-9d3b-abc123456789", + "nmdc_jobid": "nmdc:66cf64b6-7462-11ef-8b84-abc123456789", + "conf": { + "git_repo": "https://github.com/microbiomedata/metaMAGs", + "release": "v1.3.10", + "wdl": "mbin_nmdc.wdl", + "activity_id": "nmdc:wfmag-11-g7msr323.1", + "activity_set": "mags_activity_set", + "was_informed_by": "nmdc:omprc-11-9cdxha98", + "trigger_activity": "nmdc:wfmgan-11-jv8kx789.1", + "iteration": 1, + "input_prefix": "nmdc_mags", + "inputs": { + "proj": "nmdc:wfmag-11-g7msr323.1", + "contig_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "sam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgas-11-0qvjnc54.1/nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "gff_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "proteins_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "cog_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "ec_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "ko_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "pfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "tigrfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "crispr_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "product_names_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "gene_phylogeny_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "lineage_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "map_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv" + }, + "input_data_objects": [ + { + "id": "nmdc:dobj-11-1x850k20", + "name": "nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "description": "Assembly contigs (remapped) for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contigs.fna", + "md5_checksum": "6debed079383eeca2045ce23b0576607", + "file_size_bytes": 2084209623, + "data_object_type": "Assembly Contigs" + }, + { + "id": "nmdc:dobj-11-fkj2kt47", + "name": "nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "description": "Metagenome Alignment BAM file for nmdc:omprc-11-9cdxha98", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgas-11-0qvjnc54.1/nmdc_wfmgas-11-0qvjnc54.1_pairedMapped_sorted.bam", + "md5_checksum": "88ec004bd037a3820060427098798666", + "file_size_bytes": 15704979428, + "data_object_type": "Assembly Coverage BAM" + }, + { + "id": "nmdc:dobj-11-f9rnav80", + "name": "nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "description": "Functional Annotation for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_functional_annotation.gff", + "md5_checksum": "349cae9b4fe62bb910f08a183e57b475", + "file_size_bytes": 1320869282, + "data_object_type": "Functional Annotation GFF" + }, + { + "id": "nmdc:dobj-11-btqzf393", + "name": "nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "description": "FASTA Amino Acid File for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_proteins.faa", + "md5_checksum": "292eae73923605dae2ef9f5d582e4603", + "file_size_bytes": 1075716574, + "data_object_type": "Annotation Amino Acid FASTA" + }, + { + "id": "nmdc:dobj-11-hdty3m42", + "name": "nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "description": "COGs for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_cog.gff", + "md5_checksum": "c4d1121c1ceb1229afb7190d23553003", + "file_size_bytes": 712459544, + "data_object_type": "Clusters of Orthologous Groups (COG) Annotation GFF" + }, + { + "id": "nmdc:dobj-11-0gk70187", + "name": "nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "description": "EC Annotations for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ec.tsv", + "md5_checksum": "84cf22f39532e1bd001bea8425735a82", + "file_size_bytes": 116429630, + "data_object_type": "Annotation Enzyme Commission" + }, + { + "id": "nmdc:dobj-11-3mtmhf26", + "name": "nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "description": "KEGG Orthology for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_ko.tsv", + "md5_checksum": "17d699df17c97fc28796a198cf40a328", + "file_size_bytes": 169182276, + "data_object_type": "Annotation KEGG Orthology" + }, + { + "id": "nmdc:dobj-11-7kfhf682", + "name": "nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "description": "Pfam Annotation for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_pfam.gff", + "md5_checksum": "23c33758dc138e1af0f39fa1f3ca07db", + "file_size_bytes": 602929841, + "data_object_type": "Pfam Annotation GFF" + }, + { + "id": "nmdc:dobj-11-9hjg8y84", + "name": "nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "description": "TIGRFam for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_tigrfam.gff", + "md5_checksum": "bbfded219e0b359602725c9efb4f0c54", + "file_size_bytes": 61788991, + "data_object_type": "TIGRFam Annotation GFF" + }, + { + "id": "nmdc:dobj-11-2x0wy902", + "name": "nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "description": "Crispr Terms for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_crt.crisprs", + "md5_checksum": "9d2255a63e39552328c4da20ccf2bb3f", + "file_size_bytes": 142989, + "data_object_type": "Crispr Terms" + }, + { + "id": "nmdc:dobj-11-r0bx4g71", + "name": "nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "description": "Product names for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_product_names.tsv", + "md5_checksum": "6f1325b2f8dee9b2a75598fb9645c43d", + "file_size_bytes": 401118634, + "data_object_type": "Product Names" + }, + { + "id": "nmdc:dobj-11-7mj15p44", + "name": "nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "description": "Gene Phylogeny for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_gene_phylogeny.tsv", + "md5_checksum": "037aee803f1b81ac5ac1bccb9a18527d", + "file_size_bytes": 748420652, + "data_object_type": "Gene Phylogeny tsv" + }, + { + "id": "nmdc:dobj-11-r2zqpy26", + "name": "nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "description": "Scaffold Lineage tsv for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_scaffold_lineage.tsv", + "md5_checksum": "efdce9771cdda8bd8548e44ef6d1d3a3", + "file_size_bytes": 503898615, + "data_object_type": "Scaffold Lineage tsv" + }, + { + "id": "nmdc:dobj-11-4k2bt072", + "name": "nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv", + "description": "Contig mappings file for nmdc:wfmgan-11-jv8kx789.1", + "url": "https://data.microbiomedata.org/data/nmdc:omprc-11-9cdxha98/nmdc:wfmgan-11-jv8kx789.1/nmdc_wfmgan-11-jv8kx789.1_contig_names_mapping.tsv", + "md5_checksum": "1056a6ef48ce9124de0828ee85246e65", + "file_size_bytes": 250129248, + "data_object_type": "Contig Mapping File" + } + ], + "activity": { + "name": "Metagenome Assembled Genomes Analysis Activity for {id}", + "type": "nmdc:MagsAnalysisActivity", + "binned_contig_num": "{outputs.final_stats_json.binned_contig_num}", + "input_contig_num": "{outputs.final_stats_json.input_contig_num}", + "low_depth_contig_num": "{outputs.final_stats_json.low_depth_contig_num}", + "mags_list": "{outputs.final_stats_json.mags_list}", + "too_short_contig_num": "{outputs.final_stats_json.too_short_contig_num}", + "unbinned_contig_num": "{outputs.final_stats_json.unbinned_contig_num}" + }, + "outputs": [ + { + "output": "final_checkm", + "data_object_type": "CheckM Statistics", + "description": "CheckM for {id}", + "name": "CheckM statistics report", + "id": "nmdc:dobj-11-xvjz5h55" + }, + { + "output": "final_hqmq_bins_zip", + "data_object_type": "Metagenome Bins", + "description": "Metagenome Bins for {id}", + "name": "Metagenome bin tarfiles archive", + "id": "nmdc:dobj-11-85q1v678" + }, + { + "output": "final_gtdbtk_bac_summary", + "data_object_type": "GTDBTK Bacterial Summary", + "description": "Bacterial Summary for {id}", + "name": "GTDBTK bacterial summary", + "id": "nmdc:dobj-11-j5p58211" + }, + { + "output": "final_gtdbtk_ar_summary", + "data_object_type": "GTDBTK Archaeal Summary", + "description": "Archaeal Summary for {id}", + "name": "GTDBTK archaeal summary", + "suffix": "_gtdbtk.ar122.summary.tsv", + "id": "nmdc:dobj-11-ec2fqk35" + }, + { + "output": "mags_version", + "data_object_type": "Metagenome Bins Info File", + "description": "Metagenome Bins Info File for {id}", + "name": "Metagenome Bins Info File", + "id": "nmdc:dobj-11-kg68h909" + } + ] + }, + "activity_id": "nmdc:wfmag-11-g7msr323.1", + "last_status": "Failed", + "done": false, + "failed_count": 1, + "start": "2024-09-16T19:33:32.562412+00:00", + "end": "2024-09-16T21:52:12.873101+00:00" + } \ No newline at end of file diff --git a/tests/fixtures/job_record.json b/tests/fixtures/unsubmitted_job_record.json similarity index 98% rename from tests/fixtures/job_record.json rename to tests/fixtures/unsubmitted_job_record.json index dc5da4d9..f0c69355 100644 --- a/tests/fixtures/job_record.json +++ b/tests/fixtures/unsubmitted_job_record.json @@ -216,10 +216,5 @@ } ] }, - "claims" : [ - { - "op_id" : "nmdc:sys0hsm3xt04", - "site_id" : "NERSC" - } - ] + "claims" : [] } \ No newline at end of file diff --git a/tests/site_configuration_test.toml b/tests/site_configuration_test.toml index ca27213c..06173330 100644 --- a/tests/site_configuration_test.toml +++ b/tests/site_configuration_test.toml @@ -18,7 +18,7 @@ api_url = "http://localhost" [state] watch_state = "State File" -agent_state = "/tmp/agent.state" +#agent_state = "/tmp/agent.state" Commenting this out will default to _state/agent.state activity_id_state = "/Path/to/activity_id_state" [workflows] diff --git a/tests/test_config.py b/tests/test_config.py index 61748f32..0e701d4c 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,11 +1,11 @@ import pytest -from nmdc_automation.config.config import Config +from nmdc_automation.config.siteconfig import SiteConfig def test_config(monkeypatch, test_data_dir, base_test_dir): monkeypatch.setenv("WF_CONFIG_FILE", str(test_data_dir / "wf_config")) - conf = Config(base_test_dir / "site_configuration_test.toml") + conf = SiteConfig(base_test_dir / "site_configuration_test.toml") assert conf.cromwell_api assert conf.cromwell_url assert conf.stage_dir @@ -17,7 +17,7 @@ def test_config(monkeypatch, test_data_dir, base_test_dir): assert conf.url_root assert conf.api_url assert conf.watch_state - assert conf.agent_state + assert conf.agent_state is None # not in test config assert conf.activity_id_state assert conf.workflows_config assert conf.client_id @@ -28,4 +28,4 @@ def test_config(monkeypatch, test_data_dir, base_test_dir): def test_config_missing(monkeypatch): monkeypatch.setenv("WF_CONFIG_FILE", "/bogus") with pytest.raises(OSError): - Config("/tmp/foo") + SiteConfig("/tmp/foo") diff --git a/tests/test_models.py b/tests/test_models.py index 9fcb8ab4..c0096dc1 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -4,8 +4,6 @@ from nmdc_automation.workflow_automation.models import( DataObject, Job, - JobClaim, - JobConfig, JobOutput, JobWorkflow, WorkflowProcessNode, @@ -32,6 +30,11 @@ def test_workflow_process_factory(): wfe = workflow_process_factory(record) assert wfe.type == record_type +def test_workflow_process_factory_mags_with_mags_list(): + record = db_utils.read_json("mags_workflow_record.json") + mga = workflow_process_factory(record) + assert mga.type == "nmdc:MagsAnalysis" + def test_process_factory_with_db_record(): record = {'_id': ObjectId('66f4d5f10de8ad0b72100069'), 'id': 'nmdc:omprc-11-metag1', @@ -88,6 +91,7 @@ def test_data_object_creation_from_records(): data_obj_dict = data_obj.as_dict() assert data_obj_dict == record + def test_data_object_creation_from_db_records(test_db): db_utils.reset_db(test_db) db_utils.read_json("data_object_set.json") @@ -126,8 +130,9 @@ def test_job_output_creation(): for output in outputs: job_output = JobOutput(**output) + def test_job_creation(): - job_record = db_utils.read_json("job_record.json") + job_record = db_utils.read_json("unsubmitted_job_record.json") job = Job(**job_record) assert job.id == job_record["id"] - assert isinstance(job.workflow, JobWorkflow) \ No newline at end of file + assert isinstance(job.workflow, JobWorkflow) diff --git a/tests/test_nmdcapi.py b/tests/test_nmdcapi.py index 94d50b6c..646e4c62 100644 --- a/tests/test_nmdcapi.py +++ b/tests/test_nmdcapi.py @@ -3,8 +3,8 @@ import os -def test_basics(mock_api, requests_mock, site_config): - n = nmdcapi(site_config) +def test_basics(mock_api, requests_mock, site_config_file): + n = nmdcapi(site_config_file) # Add decode description resp = {'description': '{"a": "b"}'} @@ -14,8 +14,8 @@ def test_basics(mock_api, requests_mock, site_config): assert "metadata" in resp -def test_objects(mock_api, requests_mock, site_config, test_data_dir): - n = nmdcapi(site_config) +def test_objects(mock_api, requests_mock, site_config_file, test_data_dir): + n = nmdcapi(site_config_file) requests_mock.post("http://localhost/objects", json={}) fn = test_data_dir / "afile.sha256" @@ -37,8 +37,8 @@ def test_objects(mock_api, requests_mock, site_config, test_data_dir): assert "a" in resp -def test_list_funcs(mock_api, requests_mock, site_config, test_data_dir): - n = nmdcapi(site_config) +def test_list_funcs(mock_api, requests_mock, site_config_file, test_data_dir): + n = nmdcapi(site_config_file) mock_resp = json.load(open(test_data_dir / "mock_jobs.json")) # TODO: ccheck the full url @@ -55,8 +55,8 @@ def test_list_funcs(mock_api, requests_mock, site_config, test_data_dir): assert resp is not None -def test_update_op(mock_api, requests_mock, site_config): - n = nmdcapi(site_config) +def test_update_op(mock_api, requests_mock, site_config_file): + n = nmdcapi(site_config_file) mock_resp = {'metadata': {"b": "c"}} @@ -69,8 +69,8 @@ def test_update_op(mock_api, requests_mock, site_config): assert "b" in resp["metadata"] -def test_jobs(mock_api, requests_mock, site_config): - n = nmdcapi(site_config) +def test_jobs(mock_api, requests_mock, site_config_file): + n = nmdcapi(site_config_file) requests_mock.get("http://localhost/jobs/abc", json="jobs/") resp = n.get_job("abc") diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_barplot.pdf b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_barplot.pdf new file mode 100644 index 00000000..b58b2402 Binary files /dev/null and b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_barplot.pdf differ diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bin.info b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bin.info new file mode 100644 index 00000000..ebee12d9 --- /dev/null +++ b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bin.info @@ -0,0 +1,16 @@ +mbin.py : 1.0 +metabat2 : 2.15 +checkm-genome : 1.2.1 +gtdb-tk : 2.1.1 +hmmer : 3.3.2 +prodigal : 2.6.3 +pplacer : 1.1.alpha19 +fasttree : 2.1.11 +fastANI : 1.33 +mash : 2.3 +sqlite : 3.39.2 +samtools : 1.6 +EukCC : 2.1.2 +metaeuk : 4.a0f584d +epa-ng : 0.3.8 +Python : 3.9.12 diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bins.lowDepth.fa b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bins.lowDepth.fa new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bins.tooShort.fa b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bins.tooShort.fa new file mode 100644 index 00000000..a6fddc55 --- /dev/null +++ b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bins.tooShort.fa @@ -0,0 +1,100 @@ +>nmdc:wfmgas-13-56028x05.1_32164_c2 +GTAGAACATATTGCCGAAGGAATTGGCGGAATACTTGTCCTGTGGCTATGGATGGCCTTC +CTTTCCGATAGCTGGCCATTCAAATAAATGTCCCAAATAAGACAAGTCGCGGACGCTTTT +CAGCCTAACAAGGTGTTTTCGGCGAATAGCGTCCGCATTCTCATTTTTACCCAGATAGTG +ATCTTCCTGCTGATCTGGATGCTGTCGCCCACGGTGTTTCTTCCTAAGCCCGGCGAAATA +ATCACTGCGTTTTCTAACCTGTGGATGAATTACGGTCTGGGCAACGAGCTAATTACCAGC +TTCACGCTCAACCTGGAGGCCCTGGCACTATCCACGGTCGTTTCGCTGCTGCTGGCGTAT +TCGACCGTGGTGCCATTTATGCGGCCGATTGTGGCCCTGCTAGGAAAGCTGCGATTCCTT +TCACTGGCCGGACTGAGTTTCTTTTTTACCATGATGGCTTCAACCGGCCACGAACTGAAG +CTGTATTTGTTGATGTTTTCGGTCACCGTGTTTTTTGTCACCAGCATGGTCGATGTCGTG +GCCAGCGTCCCAAAAGACATCTGCGACCTAGCGCGAACGCTGCGCATGAGCGAATGGCAC +GTTGCTTGGGAATGCGTGATTCTGGGCCGGGCGGACCAAGCTTTCGACGTGCTCAGGCAG +AACGCCGCAATCGGCTGGATGATGATTAGCATGGTTGAAGGGATCTCGCGCTCGGAAGGC +GGCGTAGGCGGCTTACTACTTAACCAGCAGAAGTATTTCCGCTTGGACGCTGTGTTTGCG +ATTCAAATCTGCATTCTGTTGCTCGGCTTGGGCCAGGACTACGCCATAGGAGCGATAAAA +AAAATATGCTGCCCATACGCAAGCATGACATTAGAGCGGAAATAAAAACTTGGGATGAAA +TGGTAACGGAACTTAACAGGCACGCAGAAGCCTGCCCCACGTGTGCAGCCTGCAAAAAGA +TTGGAGATACACGTTCTTGTTGCGAGGTCGGGAAAAAGA +>nmdc:wfmgas-13-56028x05.1_269582_c1 +GATCGTGATCGCGCGTGAGCTGCTCGCAGTCGCTGCGCAACGCGCGGGCGACGGCTTCGA +GCAGGTCGGTTTCATCGATCGGTTTTTCCAGGAAATCGATCGCGCCCTGCAGCAGGGCCG +TACGCGCGATGGCCGCGTCGCCCTGTGCCGTCATCAACAGCAAGGGCAGGGAGCCGTCGG +AGCGCATCATGCGCGCCTGCAGGGCCAGACCCGACATGCCGGGCAGGCGGATGTCGACCA +GCGCGCACGCGGGTCGCTGGCACGCGGGAGCCTGCACGAAATCCTCGGCGCTGGCAAAGA +CGGTCGTGCGGTAGCCGTGCAGGCGCAGCAGCAGCGCAAGCGAGTCGCGCACCGCCTCGT +CGTCCTCGACGACGAATACATGGCCGGCATTGGGAATCTCGTTGCCCGACACTTTTTATC +CTGCGCTGCCCTGCAATGGCAGGTCGAGACAGAATGTGGTGCGTGCGCTTTCCGCGTCGA +ACCACAGGCGCCCTCCCTGGCGTTCCGCGATGGATCGCGACAGCGCGAGGCCCAGCCCCA +TGCCGCCCGGCTTGCTGGTAGCCAGTGGCGCGAACAGGCGCGCGCGCACTTCCTGCGGCA +CGCCCGGTCCACTGTCCTCCACGACGACGCGCACCGCCGGCCCCGATGCGCCACTGGCGC +GCGCCGCCTTGAGATGAATCTCGCGCCATCCGGTTTTTCCGCCCAGTGCGTCGCACGCGT +TGGCGACCAGGTTGCCGAGGATGGCGCCGGTCTGCACCGCATCCGCACACACGGGCGGCA +AGCCGGGTTCGACGCTGACGCGCCAGTGGATGTCTTCCCGCAGCAGGCGGTCGCGCAGGT +GCGCAAACGTGTTGTCGAAAAGGCCGGCGACGTCGACGCTGTCGGCGTGCATGGTGCCGG +TGCGGAAGAACTCGCGCATGCGCCGCAGGTACTGGCCGGCGCGCGCGCTTTCGAGCGAGA +TCTGACCCAGGGTTTCCAGCAGTTCGCGCTGCTCGTCCG +>nmdc:wfmgas-13-56028x05.1_269581_c1 +TTACGGCGGTCTACACCACCCGCAAAGGAAGCGACGTTATTACCGTGACCTCAACGGCCC +CGCCCAATCTCGGGCAATGGTCGGCCCCGCAGAATCTCGGCATTGTTGCGATTCATATGG +CGCTGCTCAACACGGGAAAAGTACTAACCTTCGAATATCCAACGGGCCGCAACGGAGGCC +CGACGCCGGCGCGATTGATCGATCCAGTGGCGAACACAGTAACGGATGTGACCCTGCCCT +GGCCGTACGATATTTTCTGCGCGGGCACGTCGTTCTTGTACGACGGGCGGTTATTGATTT +CGGGCGGATTGGACGACTTTCATTATCCAGCCGACTCGGGCATTGCGAATACGACTTTCT +ACAACCCGGCGACTAATACCTGGACACAAGGGCCAGCGATGAACCTGACGCGCTGGTATC +CAACGACGGTGCCGATGCCCGATGGAACGATCTTAACGGCTTCAGGCACGGCCAACGATG +GCGAGCACATTCAGTTCCAGATGGAGTCGTACAACATCAATACGAATACCTGGACGCTGC +TTCCGGCATCGGCGAACATGCCCCAGCCGAACGATACTTATCCGCTGCTGACCGAAACAC +CGCAAGGCAAACTCTTCTACTCGGCGCCGCGCATCAGCAACCTAGGCGGTGAATTGTATG +ATCCCAAAGCCAAGACGTGGTCGTTCGTCAGTAATTTGAACTGGGGACCGCGTGGACATG +CAGCCACCGTGTTATTGCCGAAATCTTCGCAGGTCATGATCGTGGGCGGGGGGGCGGCGA +AAAATGGCAATGGTGAGCCGACGGCGACGACGGAGATTATTGACTTCTCGCAACCGAACC +CGCAATGGGTTTATGGACCGTCCATGAACATAGCGCGTTACGACCACAACCTGCTTTATT +TGGCAGACGGAACATTGATTGCGGTGGGCGGAAACCAGAACTCGGAATACAGCAATCCAG +TGTTCCAGCCTGAGCTATACAATCCGGCTACTGGAGTGT +>nmdc:wfmgas-13-56028x05.1_269580_c1 +GGATTGCACATCGTCGAGCTGTACGCCGGGTATCGTGATCGAAAGAACATGCATATCCAC +GCCACAGTCGTTCATGATTTGCACTCTTCCCGGCCAATCAATGAGGCCATTTCGCACGAA +AGGCACTCGGTCCAAGAATTTCAAATAATTTTGCTCTGGCAACTCCTCTACAGGAATCGT +CAGGCTCTTGGTGTGAGCCGACCACGTCTCGGTTGCGTAAGCCTCTTCGACGGCGATGAT +CCGATTGGTTCTCGTTGCTATTTCGTTTGGCATGGTCGCTTCCTTGGTCGTTGAGGTGGG +TATCTCCTCGTCCATGCAACCTTTCGGATGATGTGCACCCCGGACGCATTCGGGCTCTTA +TGCGCTCAAGCCATGGGACAGACGACTCCCCTACGACCTGGCGATTCGGGACATGTCAGC +CAGGGGTCGCATCCAATCTCTCGGCCAGGAATGCCCCGATCTCTCCCAGCGCTTGGTCCG +CGGCCGCCAGTTTGCCTACCCCGGACACAAAGCCATGCGCCATTCCTTGCCAAACATCGA +GCCGGGCATCTACGCCCGCGTCGACGGCGCGCTCCACATAGCGCAGCGCGTCATCCAGCA +ACACTTCGTCATCGCCCACGTGCACGCGAACCGGCGGCAGACCAGCGAGGCTACCGTAGA +GTGGCGAGACTTTCGGGGCCGTCGGGTCAGCGCCGTTCAGATACGAGCTGATAAGCTCAC +GTGCTTGAGATTCGATGAAATACGGGTCAGCCGCCGCCCGTGTCTTCCACGTCTTTCCAG +TCAAGGTCAAGTCGGTTACGGGGGAAAGTGCTACCGCACCGACCGGACCCACGCGATCCG +CGTCAGTTCGGTCTGTCACGAGACGTAACAGGACGAGCGCAAGCCCACCGCCAGCGGAAT +CCCCGACCACTGCGATCCGGCCGAGATTCCGCTCCACAAGCCCGTTGTAGCACGCTTCCA +CGTCAGTTAGGGCGGCCGGAAACTCATGTTCGGGTGCGA +>nmdc:wfmgas-13-56028x05.1_269579_c1 +AACAAGTCAGCGGCAGCGGGACTTGGCGCAACCCGCGGCGGTAGCGCCGTTTCGGCTAGA +TCTGGCCGGTTTCGTTGGGATTTTTGTGGCAGGGCTCATCGCCTACTGGCCGGCGCTGCA +CGGCACGCTGCTGTGGGACGACAATGCGCACATTACTTCTCCGGCCTTGCAATCCCTCCA +TGGCCTATGGCGGATCTGGTTTGAGCTGGGAGCGACGCAGCAGTATTATCCGCTGCTGCA +CACGGCCTTCTGGCTCGAGCACCGGTTCTGGGGCGACGCCGTCGTCGGCTATCATCTCGC +CAATCTTTTTGAACATTGCCTTTCTGCCTGCCTGGTGGTTCTGATTGCCAAACGCTTGAA +GCTGCGTGGGGCGTGGCTGGCGGGATTTGTATTTGCGCTGCATCCGGTGTGCGTGGAGGC +GGTGGCGTGGATGTCGGAACAGAAGAGCACGCTTTCGGGTGTGTTTTATCTCGCCGCCGC +GCTGGCGTATTTGCGCTTCGACCGCGACCGGAAAAAATCGCAGTATTTTGCCGCGTTGGG +ATTATTTGTGCTGGCGCTCGCGAGCAAGTCGGTTACGGCCACACTGCCGGCGGCGCTGCT +GGTGGTGTTGTGGTGGGAGCGAGGCAGGTTGGAGTGGAAGCGCGACGCATTGCCGCTGGC +TCCGTGGTTTGTGATTGGTGCGACTTCCGGCTTGTTCACGGCGTGGGTGGAAAAGACGTA +CATTGGCGCGCAAGGTGCAGGCTATGAGCTAACGCTCGCGCAGCATATTTTGCTGGCCGG +GCGGATTGCGTGTTTCTACGCCGGGAAAGTTCTGTGGCCGGCGGATTTAATGTTTTCCTA +TCCGCGGTGGGATGTGGATCCGCGGATGTCGTGGCAGTGGATCTATCCAATTATCGTTAT +CGCTGTGGCGATTGGGTTGATCGCGTTAGCTCGGAAAATGCGCGGGCCACTGGCGAGTTT +TCTTTATTTTCTTGGAACATTATTCCCTGTGCTCGGTTT +>nmdc:wfmgas-13-56028x05.1_269578_c1 +CGGGAAGTAACTCGCGTGCAGGCCGCCCGTGCGTAGAACCATCATGCCGACCGCGACGCT +CAGCATGCCGGCGACCATCACCGGCATCGGCCCAAAGCGCGCCACGAGCTTGCGGGTCGT +GCCGAGCGAGAGCACCAGCACCGTCAAGGTCCAGGGCATGAAGGCCAGCCCGGTCTCAAC +CGGCGTGTACCCCAGCACCTTCTCTAGATAGAGGCTGCCGAGAAACCAGACGCCGTACAT +GCCGGTGACCAGGAAGCCGCGAGCGACGCTCGCGCCCATCAATCCGCGTAGCCGGAAGAT +CCGCAGCGGCATGATCGGATTCTCGATCCGCGCCTCCAGCGCGAGGAAGGCGCCCATCAG +CAGACCGGCCCCGATCGCGGGCAGGATCACGCTCGATGAACCCCAGCCGTCGTTGGTTGC +CTGGATGATCGCGTAGACGGCGGAGCCGATAGAGAGCGTGACCAGGATCGAGCCGAGCCA +ATCGATGCCCTGGCCGATGCCGAGTCCCTTGTCATTCGGGATCAGCGCCCGTCCGGCGAT diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bins.unbinned.fa b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bins.unbinned.fa new file mode 100644 index 00000000..e02951bc --- /dev/null +++ b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_bins.unbinned.fa @@ -0,0 +1,500 @@ +>nmdc:wfmgas-13-56028x05.1_31_c1 +AAAAGGCCCCCGGGTACCCCCCCCCCCCCCCACTGCTTGATAAAAGCCGCCTCCGGTCGC +CCCATTCATAGGAGGTGTGCTGATACTCATGCGCTATCTGCGAAGTACACAAAACAAAAC +TACGCGAATATGACTAACTTTCTAGTCAAATTTTTAATAAAATGAAACGCCAAACAAAAA +AAATATATATTTGCTGCGCCCACTCTCTCGCAACCTTTACTTTGTCCGGCCCAGAAGAAA +AAGCAAGCTTGCCTTCAGCTGGGACGCTTAATGTCTGAGTCTCAGAAAAGACGAATAGTT +TAATTATGATAAAGGCCATGAAATCAGGGGCCTTTGAGTAGCCAAGTGGGGCACAAGGAG +GCGAAGCGCATTATTTATAATTTTCTGCGCTTCGCCTACCCCGTCGCCTGATCCTTTTCC +TTACACTTTAAATAGTTTACCACCATCTATAATGCGAAAAACTACGATTGGCTTGAGCAA +GTTTATGAAGATCATCCCTTTTTTTACGTGCAATCCCCATCTTTTGGGAAGCATCCAATA +TCTCATCAGATAAACATTGATCTAAGCTCATGCTCTTTCTGTTCATACGTCGTTTGGCTG +CTGCTTCGAGCATCCAACGAATGGCTAAGGTTTCTTGACGATTTGTCGCTATAATAGATG +GTACTAATTGAGTAGTACCAGAAATTCTCACTTTTTTCACTTCACAAACAGGCTTGACAT +TTTCTATGGCATTGACCAAAAGTCTTAATATATCGCCATGATGAGCTAGACAATGAAAAG +TTTTATAAACAATAGCACGAGATCTCGTTTTTTTACCATCAATCATGCAAATATGGACCA +ATTTTTTTATTAATTGTTTTTGTTTACTATTCAAGCCCCGGATATAACTCAAATTTTCTG +AGCAATTGTATGTGCTTGCCCCACGACCTTTAGGTCTTGATGGCATATGCCCTTCCAGGG +CATAGCATAAATATCTACGATGCGATAAGCGAAGCCCGAAAAAGCTTTCTGAAAAGAAAA +ATAGATTTTTTCCGCTAAATGACCAAGTTTTTAGCCAATTTTCATTTTTTTTCATTCTCG +CCTTTTCTGAAAGTCTTGCTTGGTGAAACCAATCAGAGGATGAACCAAACACAAAGCTGA +AATTCAATGATTTGACAAATAAATTCATATAGAATCTTTGGGTTTTTTTGTACCATATTT +AGATCTGCCTCGACGTCGACTCGGTATTCCCTGCAAATCTTTAACTCCTCGAATACAATG +GTATTTCACACCTGGCAAATCTTTCGCTCTACCTCCTCTAACCATAACAACAGAATGCTC +TTGCAAATTATGGCCTTCGCCGGGAATGTAAGCAATTATCTCATTTCGATTGGTTAAACG +TACTTTGGCTATCTTGCGTAAAGCTGAATTAGGTTTCTTCGGTGTTCTCGTCGAAACACG +CAGGCATACTCCTTGCTTTTGGGGACATTGAGTTAGAGCTCGAGTACGTTGTGTGCGCCG +TTTTGACTTTCTACCATGACGAATCAATTGATTTATTGTAGGCATATTTCACTTACTTGG +TTTTTTTTAGAAAGTTGCATAAAATTTTTCTTTCCCAATTCTCACGCTTTATTCGTTACG +GGAATGGAGCCTTTGGCTACTACATCCTGCGGCCTTTCATGGCTATGCTTCCCACGATTC +TTATTAACTATAAGAGCACAAAAAAAAATAACAAGAGAGGACGGTTAAGAACCAAAATAA +AGGGCGAAGAGACTGAAAAAAAAGTATTTTTTCAGTTTCTTTTATCCTTTTTTAGACAAA +AAATTCCTGCGTGCACTAGAAAGCTCTTTTTGCTTGTCTTTCAGAGCAAGCATAGTAATT +GTAAGGAAGGCTATCCCTTACGGGATTCGAACCCGTGTTCTCGCCATGAAAAGACGATGT +CCTATACCCCTAGACGAAAGGGACAAAATTACTTCGAATAAAAATGTTCGGCCAGAGCTG +CGCTGCGATAAAAAGAACTGCGGCCTGTGGCTCGTTTATGCGCCACTTTTCTCTTTCATC +TACGATAATTCATGACGCTTTCAACTAAAAACAAAAACTCTTCACCTAATTAACATTACA +CCTACACCGAGAGCAACCTTCAATAACGTCTGCATTTTCCCTTTTTTAATAAGGCCAAAA +TTTTGGGAGAAATGAGCAAAAAAAATATATATATTTTTTTTCAATTCTCAAAATATTTTT +TCATATATATATGTTTTTTCTTTGTAGTAATCTAGCTTACATTACACGTAGTGCTTAAGA +ATAATCAACTTGTGCTTGTAGCTCAATCGGATAGAGCACCAAACTACGGATTTGGGGGTT +GAGAGTTCGAATCTTTCCAAGCATGGGCCTATCCATCAAATTATACTAAGAGAATACATC +TAATAAGTTTAAAGTAAGTAGTTCCCGTATTTTTTTCTATCGTCTTGTCTTGTAAGAGCT +AGCGGAAATAGCTTAATGGTAGAGTATAGCCTTGCCAAGGCTGAGGTTGAGGGTTCAAGT +CCCTTTTTCCGCTTGAGTTTTTCTTTACCGAGTTTTCCTTTGTAATTCCACAGAGACAAA +GTTATTGCGGTCGCCAGCGGAGCGAGCCGAAGGCACCAGGAGAGAGTGACCAATAAAACC +GGGGGTTAAGTTGCTTGGCCTTGAGGCCGCAGCTTCAAATATTTGAAATAAAAATCTAAA +AAAAATATTTTTTTAATCATTTGTCACGATTTATTTTTTTAATCATTTGTCACGATTCAG +GGCGCAGGTGCAGCCATCTTTTTTCGCGCTGCGGCGGCCATTTCCCTGTGAAACGCGGTG +AAACCGGTGCTGCGCCCACATTATTCGCATAACAAATGATGGGGCTGGAAACCGGGAGGA +TGGAGTAATGAAACGGTACGGAGAGTCATTGGAGGCACAGTGCTTTCCTTGTTTTTAGCA +AAGGCTAAATAAAATATCTGCGAAAAAAATTCTTTTTTTTATTGCGCCCCGCGAAAAGCT +ACGCTCTGTGGCCTTGCTCGAATTCAGCCTTAGATTCAATTCAGACTTGCGGATTATGCA +ATTATTATGGTGAAATTATTATAGTAAACTTATAAAAAAAAATTTTTATTTATTTTTCCA +CATACAACTTACAAACATAGACTTAGTGCCATTTGATGGGTGACTAAGAATAGAGGAGAG +GGGTATAGAAAGAAAAAACTGATCAGAAATAAAGTAATTGCTAGTAGTAAGGATTTTTCA +CGATCCATTGGTTTATATAGAATCCATTTTTTAGGTGTATCAAAATACATTATTTTCACA +AAGCGTATATAATAAAAACAACTAATAACACTAGTAACAACTCCTATTAAGGCCAGTAAG +TAAGCCCCACAGCCTAAAGCAGCAAAAAACAAATAAAATTTGCTACAAAATCCGGCTAAC +GGGGGTATTCCTGCGTATGAAAACATAGTAATAGACAAGGTAATAGCTAAAATAGGATTA +GTTTTGGCTAGAGCACCTAAATCTGCTATATATTTGAAATGGTTTTGACGTAATGCTAAA +ACTATGGCGAATACATTGATGGTCATTAATACATAGATAAAAACACCAATTAGTAGCGAT +TGAATTCCTTCTATGGTTCCACATGAAAAACCAATAAAAAGATAACCTACATGTCCGATA +GAACTATAAGCTAAAAGTCTTTTGACTTTGTTTTGAGCCATGGCGGCCAATGCTCCTAAA +ATCATAGAAGCAATGCTGCAAAAAAAGAATAGTTGTTGCCATGTTGGATCATAAAAACTA +TAAATAAAAACACGTACCATATTGGCAAGGATAGATATTTTAGGTGCAATAGAAAAGAAT +GCTGTAACCAAAGTAGGTGAACCCTCATATACATCAGGTGCCCACATATGAAAAGGAACT +GCTGTGATCTTAAATAAAAAACCTACAGCAATAAATAAAATCCCCATAAAAATACCACTA +GATTGAGCACCGAATAAAGTGATTTCATATCCAGTGAAAATCTTAGCTAATTCCTCAAAG +TTGGTAACTCCAGTAAATCCATAAATCATAGAACAACCAAACAATAAAATTCCGGAGGAG +AATGCACCTAAAATAAAATATTTTAAGCCGGCTTCTGTGGAGAATTCAGAGTCTCTTTTT +GATGCTGCGATCACATAAAAACATAAACTTTGAAGCTCAATAGCTAAATACATGGCAATT +AAATCATAAGCCGATATCATGAATAGCATACTGCAAGTAGAAAGTAGAATTAAGACAATA +GATTCAAAAGCATTCAAACTCTCTTCTTTGAAATAACCCAAACACATAACTATAGTGCTA +GCCGTACTTACTAATAGAAAGATTTGGCAAAAATATGTAAAATTGTCTATTATTAAATTA +TTATAGAATAAATTGGCAACAGTTAAAGGTGCGCTAGAGGCCACCAATAAGATGGTTATT +AGATACCGATAGGATTCTTACCCCCCGGTCAGAACCACACGTGCAAGTTTCCTTGCATGT +GGCTCGTCCATGATAACTTCTTCAGGTCTACGGATCAATACCCCCTAAGGCTGGGCCTGC +ATAAGTGAAATAGAACACTGATCATGTTCGGAGTTGGCGTTATGCCATATTGTCTTCCAT +TCCTTTATTAGTTACGTCCATAGGTAGATTAATTAAATTTGTTGCTTTGCCCAGCTGTTG +ACCTAGTCTTTTATTCAGGGTCGGTATATTAACGTAGGAAGTCTCATTAATATGAGGCTG +AAAGTGGTACTCAGCGAAAAAAATATTTTTTTTTCCCAGTCAGCCAGCCTTTAATGGCAT +TATCCTAAATTGCTTCTCTGATTTTGCTATACTTTCCAGACGCGGCACGCATCCGCATGT +TTCCAGTACAGCGCATTATCACCTACCTCCCTTTCTTCCAAGCCTTTCACTCTAAAGGGC +ATCGTCGTATGCTCGACATTAATTGCCCGGTGTATTTCTTGGGGTACATTATGGAAGGAT +CTGTCACCCGTACATTTTTGGCTAAAGCCTGGGTCTCCAAGGGATTTTCAAAAGTATTTT +TGAAAATCGTAGCAAAATATTTTTTTTTTGCTACGCCCTGCTTTTCCCGGTCCCTATAGA +GTCCATTTGGGTGATCCCTAGTTTGCGCGTTTGGCCGTTTGCTCGTCGTTTAGTTACGTG +TACCACTCTTCCTGTTTATTACAGTTACATCCGGAGGGTTGCTCGCACCTGAGTAGCGTT +CGAACCCACGTGCCTTCCGGCCCCGCCTTTCGTTGGGGGAAGTTACCTTACTCCTATGCG +TACGATTGTTCTTGCGACGAAACGCGGATAGTACCCATGCTATGGTCCCCTGCGGTCTGA +TCCCACATAAGGTTAGGGAGTCTACCCGAGCGATTGTTTTCAACCTTCGTCTTCCCAAAC +GCACCCTCCTAGGCGCACAACACTAAGTAAACCAAGCCAACTCACATTACACACTAATGG +TGGATAATCATATTTTTTAGAGGTACTAAATACAACTCCATAAATAAGCAAAATGATGGT +TGCGTTAATAAGAAAGATCTCTGGGAAAAGCGCTAAAAAATCATGTTCAAACATTTCTTC +AAACCTCTTTTTTATGTTTTTTAATCAAATTTTCCATGTTGCACTAAGTTACTTACGGAA +GTATGCATACACTCTAGGAACACTTCAGGGTAAACACCCATCCAAATAACTCCAACAATA +AAAGGTAAAAATATTAGAACTTCTCTTCTATTCAAATCGGAGAATTTTTGGAGGAATTTG +GGTTTGAAATTCCCAAAAATCACACGATTATATAGCCAAAGAGAATAAGCTGCGCCTAAA +ATCATCCCAAGTGCCGCTAATGTGGCCACTAAGCTATTTCTTTGGAAAGCTCCTACTAAA +ATAAGAAATTCCCCAATAAAGCTGCTAGTACCGGGTAAACTCATATTGGCTAAGGTAAAG +AATAAGAAAATGGTAGAGAACATTGGCATGGTGTTGACCAAACCTCCATAATATTTAACA +AGTCGAGTCTTATGTCGATCATATAAAACACCAACACACAAGAAAAGGGCTGAAGAAACC +AGTCCATGACTTAACATAAGTAAAATACTACCTTCAATTCCCTGTATGTTTAGACTGAAC +ATACCAATAGTCACAAAATTCATATGAGCTACCGAAGAGTAAGCAATAATTTTCTTCAGA +TCGATTTGTCTTATTGTAGTCAAGGAAGTATATATAATAGCAATCACGCTTAAAGTATAA +ATGAAAGGAGTGAAATAAAGTGTCGCTTCAGGAAACATAGGTATAGAAAATCTTAAAAAA +CCATAGGTTCCTAATTTTAAAAGAATCCCTGCCAATATTACAGATCCAGCCGTAGGTGCC +TCTACGTGAGCTTCAGGTAACCAAATATGAACTGGTACCATAGGCACTTTTACGGAGAAA +GAAGCAAAAAAAGCAATCCATAGTAATATTTGGCGCCGCTCACTAAATTCTGTGGTTAAT +AATATTTGTAAATCAGTGGTTCCTGTTTGGAAGAAAATAAATAAAATGGCTAAGAGCATG +AAGACAGATCCAAGTAAAGTATATAAGAAAAACTGATATGCTGCTTGTATTTTTCTTTGT +CTAGAACCCCATACCCCTATGATAATAGGAGAGTAAGGGATAGGCCCTCTGCTTTGTCTT +CCCAGGATAAGTGGGTCGAGAAAAGGCTCCTGTAGGTACCTACTCCCTTCTCAGAGAACC +GTACGTGATACTCTCGCATCATACGGCTCCGTCTCGAAATAGCTGATGAGTCAGAAAGAA +GGACCAAGCAAAAAAAAAATATATATATTTTTTGCAGAAAGGGCTTTACGCCTTTTTTTG +GCTTGTAGTTTTTTGGGCAAAAAAAAATATGTTTTTATTTGGTCTCCTCGTTTGTTCCAG +CAACTCATTCTGCGGCCTTGCCAATCGCTTCCCGACGAAGGAATTGACCTGAGGCCTTCT +TCCAAGACCAGGACGATTATCCTTGTCAGCTCAATAGGTAGCTGACGAGTTTACGTCCAT +CCCTGGGCGTCGGGTACTGTTCCCTTCCCCGCCACCCTTGTAGCCGTCACCCGTGATTCG +CGCAAGTGTGTCTGCATCCCCTAGACTTGACGAAAACTGTTTTCTCGCCGCAAAACTCCA +TTCTCCCCTGCCTAGGAGCGCCCTTTCTTGCATGTTTGTACAATACTTGAGTAGGCGGAG +TGAAGTCCTGCAAGGTACTCACTCAGAGTACCCCCCCAATCCCAAATAGGTCATCCGACG +GGTTCAACCAAAAAGCTATGCTTACACACAAGACTACCCTTCTCCGAAAGCTTCGCGGGG +ACTACTAGACTTAGATCCGCCCGAAGGGGTTTACTGCATAAGGCTCCTGCAGAGGCGGCG +CTTCGCGCCGCGAATATCAGATGCTCAGCTCCGCACAACATAGGGATTAAAACGCTTTCA +AAAAAGACATAAAATAGTAAAAGATCCAGCATGCAAAACACAGCAATCATGAAAGATTCA +CAAATTAAAAACGCTATCATATACTCTTTTTTATAACTTTTAATACTGGACCAACCTACT +AGAATGCAAATAGGAATTAAAAATGTGGTCAGGACCACAAAAAATAAAGAGATACCATCT +ATACCTATATAAAAATTGATGTTTGAATAAGGAAGCCATCGAATAGTTTCCACGAATTGA +AATTTAGCTGTGGAATTATCGAATTGTATCCAAAAAAGAAGGGAATACAAAAAAGTAATC +AGAGAAGTGCACAAACCAATACTTCGTATCAGTCGTATTCTGAAATCAGGGATAACAAAA +AGAATAATGCTTCCTAACAAAGGACACAGAATAAGACCACTGAGATTGGAATAAAATGGA +GCTAAAAATTGTAACATAAATGTTTACTCTTTTTCCAAAAGATCCCGAGGACGCAGGTCC +ATATTTCTTCCTGGCTTTCCAGCCATAGCGGCCCTATGGGTATATCATCATCACCCCTGC +ACTTATCGTACATAAAGTCCGCAAAAATTGCATAACTTGATGGGCTTTTGTACGCACATA +CTATGTAATTGCATGTTTCGCCTTTCGCTGCTTTGCCCAACGCTTTACTTTAGGGCTTGC +TACTATGACCGGACGGCCCCAGTCAGGGTCGGAGGGATAAGAAAAAGCCGATTGGGCAAA +AAATTTATTTTTTTTGCTTTTTAGGCTTCGGGCCTCGGCCCTCCTTCTCAGCAAAGCCGA +AAAAAGGGCGTAGCACTGGCTTATCATTGCATCCCTTAAAGTTTCATGGTATTATATGAG +GAAGTATGGCTTTTTAGTTCGTGCTAATGTCTTTTTCAAAATGAATAAATAGAAAACTCA +CTATATAAATAAAATACAATCGATTATCTACCCAAAAAGAAATAAAATCCCACAGACCTA +TTATGGTAATAAATATGGTTAAGCCAATTAACATCACAAAAGCATAATGATAAACAAAAC +CACTTTGAAGTTTACTTATCTGCTTGGCTAATTTTCGAAATGTGTACGAAATCCCATAAG +GCCCCAAGATTTCAATAGCACCCTTGTCTAAAACCTTAAATGAGACTTCATATCCGAAAC +GCAAGAAGAATCTAACTATAAAGTCATTAAAAATTTTATCAAAAAACCAGCGCTTATTTA +AAAAGCAATATAATCGATTACCCAAAGTACTAGTTTTCGAAGCGAAAATGAATTGATTTG +CTACAAAATTTATATTATACGCTATAAAAGCACCTAAAGTACTAAATAAAATAGGAATTA +ATTTGATAATTGTTGGAGTAGCAAACTCAGATTCGGCAAGAATTTCATTTTTTGGTAGTA +TAAAAAGGGAATTAGCCCAAAAATTGGTACCTAAACCAATCATCATATCGGTCCCTAAGC +CGTTCCATTGCTGGAACGGCTTGGCTTCAAAACCGTACGTGAGGCTTCCGCCTCATACGG +CTCCTCTCAGGATTTTTCCGTCTTCCCGCTTGTTTTCAACATGGCAGTGCTTGTCCAGAA +GTTAAAGGTTGTGTTCTACGAACACGCAAGGATTTCACTTTTATGTGGTCCACTTTTATT +TCCTCGTGGTTTTCTAACATCTTTGGACAATTTATTTAAGAGCCCTTTCAGGAGTTTGGC +TACTGCATTTGTAACCTCAGATTTCAGTAGATAGTATTTCCGTTGAAGGTGTGCAGCGGA +ATAGAGAAGTCAGGACAAAGAAAGAAATATAGATTTTTTTGCTGTTGCGCCCCCTCTTCA +CGCGGCTCGTAGTTTTATTGGGCTCTTATTTTGTTTTGTCTTAATGTGCTTGTGGCTAGC +TATCCAACTCAGTTTGACCAGGTAATATTCTTTCTTCACCCCAAAGGCCGTTGTGTAAGA +GTCGTACAAAATCCAGTTATCTTGGTATACTTTCCCTTGGAAGAGCCAGCTTCTCTTTTC +GGGGAAGTACTTTTGTTTGATTTTATCACGACCCCATGTCGGGTGCCGTCGGTATACCCA +TGCTCGGATCTTTTGAAAGATGTCATGGTCTAATCTCCGAAATATATTACTGCATTCAGA +GTATTTGAAGTAGTTGCCCCATCCCACTATTTTGGGTACCAGGGTTATAATAAGTTGGTA +GGCTGCTTTTCCTTTTGAAAATTGAATGGCCCGTCGAATATCTTCGAGAAATCTCTGGCG +AGATTCACGAGACGGAGTTATTAAAGTTCTCACTTTGCCCCATTTCCAGATATGATTAAT +TGAAAACCCTATAAATTGGAACCCGGACCCGGTGTTGACTATCTGGATCTTATCTGAGTG +CAATTCTAGTCCCATGCACCTTAACCATTCCCTCAGGAATGTCTGAGCTTGTTCTATGAC +CTCCCTGGATTGGTGAAGTATAACGAAGTCGTTGGCATATCTAACCAGTATCGCAGTTGG +TTCTTTGGGATATGCTTTCAGTGACCACTCTGTTAAAGCTGTCTCCATCCCATGGAGAGC +AATGTTGGCTAGCAGTGGGGAGATGACGCCACAAGTGCCCATATTCCTGGTTTCCGCGTA +ATTTCCTAATCCGGTCATGAGGTCGACTTTAAGCCAGGCTTTGACCTGTTTGGCTAAACT +AGGCATAGTTTTTAATTTATTTAAGAGGGCTTCGTGGTTGATGCGATCGAAACATTTGGA +AATGTCCGCGTTCAGAACATACTTGGGCTTGGCTCGAATAGCTAAGAATATAGCTTTGAT +GGCATCCTGGCAGCCTCGTCCGGGTCTGAATCCATAGGAATTGGGTTCGAAGCGGGCTTC +CCATTCAGTTTCTAGGGCCATCTTGGCCAAAGCCTGCTTGGCTCTGTCTTCGATAACAGG +GATAGGCCAGGAAGATAAAAAGGCGCGAAGTTTAGTTCGAAAAAAAATATATAGATTTTT +TTTTGCTTTACATTTTCCGGGCGCAAAACGTGCTCGATTGACTCTACGTTTTCTTGCGCG +CAGCGCAGAAAAGGCGCAACAAAATCTATATATTTTTTCTGCTTGTAGTTCTCTGGGGCG +CTCTTTTTCCTTCCAGGGCTTTGATATTCTTATTCGACGAATGGGCGTAGCCTTCCCATC +CAATTGAAGACCTCGTGCCATCTCTATTTTTTGTGACCCTGAGGTTACCATCCTCTTGTA +AATGTCAGGGTCCTTTTTCCCTGGGTTCTCCTGTGTAACTTGTCTCACGGCCAAAAGTCT +GGCGTGTAGATTTCGTATAAGTCTAGATTGTAGAGCACGCATCTTGTCCATATCGTTGAA +GCGAGAAGCTTGGTAAATCCTGGTTTGGAGTCTAAAAACAACTGCCTCGACCTTGGGCCA +AGGAATTTGTTCCCAGGGTATCGTTTGGGTATCTATGTAGAACCTACTCATAACTCATTC +TCCTTTCCAGTTTTTACTGAAATCCTAAATCTCCAAGTAAGCATAATAAAAATGCTGCGA +AAATAAATATATTTTGGCTGGCTGCGCCCTGCGGCCTTGGCTTTTTAGAGAATCCTGCTC +TCGTAGGGCTTGTAGCTTGGCAGCCCGCCACAAGGGAGCCCTACCAGAGTCTTCCAGTTT +CGAGTTTATTGGATAGTTATAGCGGCCCATAGGCGCAAGATGTACTTTGCGGGGTGGTTC +CTTGGACATAGTCCTTTCAGACAGTGGCCGTTTAGTCCATGGTCCATTGGATGTTCGGTG +CAAGACCAAAAATTTGCACTGCAAGTACCCCCTAATTATTCCCCCTCACTATAGGGCCCG +TCCCTCAGTGTGGTCAGTACTTGATACTGTCAGGCAGCAAAGCTTCCACTTGTTTACTTA +TGATGGTTCACCAAGGCCTCTTTCCTCCTCCCTTTTTTGCTCACTTATGACTCAGAAGCG +GCCAGACCTCCTACAAAGGAAGAAGAGTCGACTGAACATCTCAGCCATTGGCGGGAATTT +CGCCCGCATCCAATTCTCAATTATCGTTCACCCAACACGAAAAAAATATATTTTTTTCGT +GTTGGGTGAGCAACAGCCACTTCGTCACAGGAGTGACTTATGGGCTAACAGGTCACACTT +TGGCCACGTATCCTACAAAAATACTTCCAAAAGCTAAAAAAATTAAAGGAATTGCCATAA +GAATGGGCGCATCATGACATCGTAAGATGTCTCGTTTGAATGAATTTGTTGGTGCTAAAA +AAGTTAGAAAAAGTAAACGAAAAGAATAATAAGAAGTAAAGAAGACAGAGACACTTCCCA +ACCAGAAAGCAAAGTTACCACTAATCGTATATTTAGTATAAGCGAGCTCTAAAATAACAT +CTTTAGAGTAAAATCCAGTACAAAAAGGGAAACCTATTAAAGATAAGCTTCCTATAAGCA +TCATAGCATAAGTAAAAGGTAACAAGGAAGCAAGCCCTCCCATCTTACGCATATCTTGCT +CATCCGACATGGCATGAATCACTGAACCTGCACTCAAGAAAGGTAATGCCTTGAAAAAAG +CGTGATTCATTAAATGGAATACGCTAACGGAATAGTTTGAAATGCCGCAAGCAAAAATCA +TATAGCCTAATTGACTACAAGTTGAATAGGCTATGACCCTCTTTAAATCGTTTTGTAATA +TTCCAGTGGTTGCTGCGAAGAATGACGTCATAGCTCCTATAAAAGTAATAACAATCAAAG +CAATGGGTGAATATTCGAATAAAGGAGAGCACCTTGCTATCATAAAAACGCCTGCTGTTA +CCATAGTAGCTGCGTGAATCAAAGCAGATACTGGAGTGGGCTACTCTTTAATAACCTCTT +ACGCTTTCATACGGCAAAAAAATAATATTTTAGAGCATTGGGTAATAAGCTGATGAGCCT +AGCAAGAGTAGGGACTGGATCTTCCACTTATGAAATAGAGACTCTCCCAAGAATCTTACG +GATGGCCGCTGCGCCCTTAACAACTAAGATGTTTTTTCTCTTTTATACATGAGACGCCAT +CTCAGCCCCATTCTAACGCTTTTCGAAAATATATATATATATTTTGCAAAGCAAAAAAAA +ATATTTGGGCTTTTTTGAACTGAATCCTGGTAGATCCAGCGCGCAGCGCTTTGGTAACGA +TGACGATAAAGCTGAGCTCAGGCCGAAAGTTACGATGTAACACCAGGTTGCGCTGGAAAA +AAACAATAGATATACTCTTTTATTCCGCTTCTTTCAAATAAGGCTTATAACCAAAATGAT +AAGTATTTGATTTGATACAAGGTTTCTTTACATGCCCAAACCCTATACGGATCCTTCTAT +TCCATAAAATCTGCACATCTAGATTATAGAATCTAGAAAAAAATGATTAGACCTTAACGA +CAAAATAGTGCTTCGTACCTTAGGTAAATCTGGCCAGCTTATCTTTCCAGGGATAAATTC +CATTTTGAACAAGAGGTCTCACGTACAGTCTCTGAGGATCCTATAGAGCTCCTTCAGCCT +TTACTTCGCTGGGTGGATTTGGCCTTCCTTCATAGGTTTCCTGCTGATTGGTCAAAATGA +GAGATTTTTCCAATAAGGACTCTCATTCGGTCGACGATTCCAGCATACAGTGAGATTGTT +AAAATGTACCTAATGGCACATAAGAGCCCTCGAATATTTCAAAAACCCTCCATTGCATCA +GGTAACCAAGTATGCAATCCTATTTGTGCAGATTTTCCAACAGCGCCAATAAAAAGTAAA +ATACAAATAACAGTTATGGCATGAAATCTCATATTGCAGAAAATGAAATAATGATGGGGT +TCGGAAAAGGCACTAGCACAAGCAAAAATAGTCGAAAAGTCTACTGTTTGAAAGATAGTG +AAACAACCCATAATCCCGAGAGCTAATCCGAAATCACCTACTCGATTGACAAGCATAGCT +TTTATAGCTGCTTTATTGGCTTGAAGCCGTGTAAACCAGAAATTAATTAACAAATATGAA +GCGAGACCTACTCCTTCCCATCCTAAAAATAATTGAATAAAGTTATCTCCAGTAACTAAC +ATTAACATAAAAAAAGTAAAAATAGATAAATAGCACATAAATCTAGGGCTATGCGGATCC +TCGGACATATATGAAATAGAATAAAGATGAACTAAGCTACTTACAAATGTAACCACAATT +AACATAACTACAGTCAAACTATCAAACACAGAGTCAAAATTTTTACTTTACTGGGGCCTT +AAATCGCAGAATCAAACGGTAAATTTTTCGCGTACCGCAATGCGGCGGCCATTCACCCAA +GTGAAATAATAAAGTGCTCCCCGAACCGTGCGAGATGGTTACCCATCACACGGCTCACCA +ACTTGAGATTGTGATTAAGGCTTGGGGTAACTACTGTATGTTTAAGCAGGCTGCAGCAAA +AAATCTATTTTTTTTTTATTCGCTGCATATTTTTTTTTCATTGCCTAGTCTCATTCGAAG +GTACAGTGCCGCTTACCTTTGCCACTCAAGCGTACGGCATCTGCCGACTTAGGCCTCTTC +CCTTTTGCCGATCTCAGTGTTTTCTCAAAAAAACTCGCAGCAAAAAAATTTTTGAATATT +CGAAGCTGCGCCCTTTCGAGTAGGCGGGTACTACGAGCCCTCTGTCCCACACATCTCTAT +CTATAAAAATGTGTGGTTCACCGGTTTCACCGAATACTCTATTGATGTCGAGACATAGGT +GCGCTTGAAGTGGTGTGCTGTCCTTATTGGACTCAGGCCCCCCGTTTGTTCGGGCATATG +CGCCCTCTTGCTGCCCATATGCAGGGGGAAACGCCGTGTTATCTAGCCTCTATTACATAA +GGCCTCGCCTGATGCGCCAAGTTTGGGATACCTCCTCCACCTTACGTTTGTGACCTATGG +CCTCACCTGTGTCTCAAAGACACGGTCGGGGCACAGCCAAGGATATTTTTGGCTACGTCC +AGTATGCCCTTTTCCCGACATGCTATGATGCTCTAAGGGAGTTCACCCCATAGAGTGAGT +CGAGTCCGTCTCACCGCAGAGCAACTGCAGCGTCCGGATAATCTATTTATCCAGCAATTC +ATCCAGTGACTTCACGGTCGCCAAAGAAGCCCCAAGAAGCATCAAACATCTCGGAAAAAA +TCCATGGAGCAATTTTTATATAGCAAGCACTAGCTCCCAGTGCAACTTCATAAAAAGCAA +TCAGAGATAAAATAAAAGATAATGAAACACACGTGGTTGTGACTATAGCAGTTCCTCGTA +AACCAAGAAAACGACCAAAAGCACCTGCTACACAACTACCTAATAAAGGTAAAGTTACAA +TTAATAAATACATACATAAATCATTTTTTTTTTTATTGTGACCAAAATACAATCGATTGG +GTAGATAATTGATTGTATTTTGGGCGACAGCCGCACAAGCCAAGACTTAGATGAAGGCTC +TTTTAATTTTAATAAATTGACGACACAGCCTAACAAAGCGAGTTTTCTTTTAGCGCATCC +AATAGAGTTAGCCGCATGCCATTACTACATAACGACATAATTGAAAGATAGGTAATCTTA +GATCACTCCATTTTATTAGTAATCCACTTATCATCTGCAACAAGTATCAAAATTTTGTTT +TAGTAAGTGCCTTCATTATGCAAGAACTACGGAAAAAAAATCTTTTTTTTTCACACAGCG +GGCGTAACAGGCGTGGCTACGCCGCTGTTATCACTCTATCGGTCCTTGTGGAAGCTGATG +GCTTATGCAATCAATATTTTGCTTGGCAAAAGCTCATAAAGCCGTTTGGATAGTAGAAAA +GAATAAGGCGGACAAAAAAAAAATTTATTCTTCTCTTCCACTTTAGTCGCAAAGCAATTC +AGCAGGGAAGAAGAATAACCCCTGGCTAAACGAAGCCTTTATTTGCTTTACGAGGACGGC +GGAAAATAGGGGGCTGGGGCCCTTAGCTTCAAGCACAATTGCGGAACCATAGAATTAAAA +AAAAATATATATATATTTTTTTTTACTTCTTCGCCAACTTATTATCTCTTACTATATTAT +ATTATATAGATGGCAAAACTACGTAGAACAAAGCTCAAAAATTTTTTATTCCAACCTTTG +CACTTTATTGCTTTTCTCTAGCCTTCAACGAAAAGAAGCATTCTCTTCTTTTAGTTCTAA +ATAAAGTCAATAGAGGCTTACTTTTTTATTGGAGTATTCTGCCTGTCATACAAAAGTCAT +TGCCATTGCCAAGGTTTTTGTGACTATGGTTAGATTAAATTGAGTCATTTTTTCGGCACT +ATCTCAGATTTAAGATAGACTAGTATAAATCAATAAAGAAGGGGTCTCTACACACCTCAA +GGCAGAGGGCGCAGCAAAATATAGATTTTTTCAAATATTTGAAAAAATGCCGCAGGTCCG +GCCGAGCCGGAATAAGCCGGGGGTGTCTATAAAATGAAATGGTGAAAAGAAAACGACACA +GAAAGATTGTGTTTCCACTCTGCGCTTCGCTTCCCTAAGCTCACTTGGTGAAAATGGTAA +ACACGACAGACTTAAAATCTGTTCCTATGGGTTATCGGTTCAAGTCCGATAGTGAGCATA +CTCGCTTGGTGAAAATGGTAAACACGACAGTCTCAAAATCTGTTCCTATGGGTTATCGGT +TCGAATCCGATAGCGAGTATCTTAATGTCTGAATTGAAAAGAAGGGCGAGTATAACTTAA +TAGGTGAAAGTGTCAGATTGTGAATTTGAAAACACGGGTTCGAATCCCGTTATTCGCCAA +AAAAACAGATCATCCATTAGCTTAAATCTTTACAATCTTTGAGGGCGCTGCTTTTCGCAG +CAAAAAATATTTAAAAAAAAAAGTTTCGCTATAAACTACGTGCCCCAGCTCTGTCAATCA +AGCCTGCGGCCTTGATTAGCAAAGTGGGGCGTTACTGGGTAGTTAGCCTCTGGTGGTGAC +GTCACCCCATTGGATCCTTCCCTTTCTTCTCGTATAGTGGATTGAGCATAAAAGGATGGG +GCGTTAGACATTGCGCCCACGCTTGGATATCATCAGCAAAAATGGCTGATGGATCACTCT +GCTCGTATTAGCTGGTCCAAACAAGAACATAGAGAAGTATATGGGTAAAAAATGAGCTCA +AAAAGTTGTTGAAATACTGATGTTGTTTCTAAATTAGCAGCTTTTTTTATATCCATATGA +TTGACCGAAGTAGATTGAAGTTGTCCGTATAATAAAACTAGATTTTGGTTGTAGAAGGCT +GAAGGTAACAATTGTGACCATGTATTATTTGGTGCTTCTTCAATTAAGTACAAGATACAA +GTGGGACCTGCACTAGAAGCGAGCTGCTCAATAAAACCACCTTGCTTGTTTTTATGTGGT +AGTTTTCCTTTGTAATTTGGTTGAAATAAAGTTCTACCTCGAAAGGCACACAATATATTT +TTGAGTTGTCGCCATTGCCGACTTGTCAAGCCACTACAATGGAATAAAAGAATATATGGA +GATTTTTTTTCAATCTCTTGGGCTTTTTTCCGTATAACAATTTGTTTTATTAGCATAGGA +TCATTATTATTATTTTTTTTCTAGTTCCTTTTTTTCTTATTTTTCAACATACCTTAAATT +TGAGTAAGTGATGCCGGGTTTTTTTTTACATCGAACAAATGCTATGTTTGTTAATATGCT +TTATGTTTTCTGAATAATAAACCGCGTAACACAAATAGTGGAGGTGAGGTCAACCTTGCG +TCGTGCTACACGACAATTTTGTTAAGGCTTTATTCCAAGCAGCTGCCTTCCGTACTGGCA +GCTTTGACAAAAGGATCACTTCGGGAATATATTCGTCTTGAATTCATAAGACTAGGAGTT +TGAAAAATTGTATGACAAGACATCGGCTCAAATAGCTACCCAAGAGGAAGCTCAGTGAAT +CCCTAGACCTACAGGTGATGCGTACCGTGTAACAGAGAAATATTTTTTCTTCAAACCTAC +TATCTTCAGCCATACTAATTGCCCCTTCTTGCTTTTCTGAACCTTTGGCTAAAAGACACA +AGGCTCGGCCCCGGCCCCATATTCCAGCTTATTTCCGCGATAGGAGCATTTATGCGTTTC +CTGATGATGAAATTGAAGCCTGCGTTAAAGCCCTGCTGCGCCCTTCAGCCTCAACAAGTA +TAAGAAGTAAGTTGAATCTTTTAATTATCCGCCAGGGTTGAAACTACTTGCGCGGCCTGA +GCTATTTAACCTTCACCTCGCTTTCAGTTACTTTGTATCCTTATATGGAAGCCCTTTATT +ACCAAATACTATGGATATATGTCCCTGCTATTTTCTTCTATTCCTCAAAAGCTTCTTATA +GAATTAAAACTTACTTAGTGGGTGTTTTTCTTCTGTCCATGGCTAATAAGGTGTCACGTC +CAGAATTGTTATGACATTTTATAACTTTCTTTAGGGCTTTAAAAGAAGCCGAAACAGCGC +CTTCGGCCCTGCCCTTCTCATAACTACTTGCCCTTCTCATAACAAACAAAAAAGGAGGCC +CGACAAAGGGCCGAAGGCGCCGGCCACCATCCAGGCAAGCTTTGAACTTTCGCCAATAAA +TAAAGGGCTTTAACTCTGGCGCATATTTCACAATACCGCACAGAAAGGCGGCGCTGCGCA +CCCTCCCTTAGAACTACTAAATCTCCCTACTCAACCTAATAATGTTGCTTGTTTTAATGG +CTTTTTCTTTATTGCTCGAATAGCGGATATGGCAAGAGCGCGCTATATTGCTATATATGA +TGATTCATAGGATTTATTCCTTAGATATTTGACTAAGCTCCGAGCCTAACCAATAATAGA +TTCTGAACGGGTGTCCTTCGTCCGGCCTGCTTTGTTCCATTTCATAGCGGGGCTATTTCT +GTTTTATATTACTTTATGTGACGGTTATATGTCATAGAGTGGACAATCCCATCATCACCG +CTATGAAATGTTTAACCTGCCCAATTTTTTGGCAGCGGCAAGAGCAAAGACAGGAGCTGG +CCTTTTACCAGGCCAGCTTATCTTTATAATCAAGTATCTGGCTCTTTTTATAACTTTTAC +AAGGTTATTTTTTAAATGCATTTCCCTATTCATTCAATAAGGTGTAGCTGTCAACAACCT +ACACTACAGTTAGGGTCATTAAGGCTGAATGACATCTGGTTCTTTTTTCTTTATTTAGGG +CTTCGCTCTCTAAGGCGATAACAGAAGCCATATAAGCTTCTACTATAACATCGCCATAGG +CATACGCTACAGAATGCTTATAGGCTCTCCTAACCTCGAGAACCACGTACAAAAAGGATT +TGAGAAAAATAACGATATATCCGTACACAAGCTTTTCAACCGTACTTCCTTCGCGTAAAA +TAATTGCCAATCAAATATAGAGGCAAGATTGAACCAGGGAAGGGCGCCTCCTTTCACATC +TTTCTGCAGCATCCTTCGTTTTTCGACCGAAAGACTTACTTAGTACTCGGCTAAGTTTTG +TTTGGTTAGCTGTAAGGAAGCCTCTTTTAATGTCCCGTCATACTGCACCCGAGAAAAGCA +AAGAGAATACTACTTAGGTATAAGCACGCGCTTGCAAGCTTCGCCCTTTATTTTATAGTT +AGTTTCCTGTTGGGCTTTATGTACGCTTTAGATAGAATTCTATATCTGCTTTTGCGTCAT +AAAATGGGATCTTACATCTTCGTTGTTGCGATATCGCGATGCCCAGCACGAACCTGTTGT +TTTTTGAACCAGGTTAGTAATAAAAGCAATTCAAAAACTGTGAGTGCGAATAGTCTTTCC +TACTATACTAAAAAGCTTGATAAAATAAATAGGTTTCCGTTTTGGAGACGAGAATAATAA +TAATTAGCCCAGAAGGAATAATATCACAGAACAAGTGTTTTGTGTACCCAAATTACGCAA +ATGCATAATAAAGCGGTATATTATAAATGGCGATATATTTGCAGCAGTCTTCGATTAGTA +GGCCGTAGGAAGTTCATTTTGGTTGAGTTAGCACTTCTTGCTGGCCCTCTCACTCTGGAG +AGAGAGCTGGGCCCTGCTGCTGGCGTCATCGATGTACATTATTACATTTCGAATCAAATC +TAATTTTTCTGACAGCGTACGATTACTATAAGCTAAGGTTCCTGCTACTTTCATTAAATT +AAAGTTCGGTTCGTTTTCCAAAACCTCATAACAATCCAGCAGGCTCTTGAATAGACACGA +TAATAACCCTAAAAACCATGATACTTAAATGAGAACACATCTTAGAAATGCTATAATTTC +ATTATGGACTGTCCAATTTTTTATGTCAGAATAATCATTTTGGAACACCCTCTAAAATCC +AATGATGAATAAAAGATGGTCCAGAAAGAAATACCACAGTTGTAACTATTCTTACAGTAA +TTTTAAAGATAACAAAGGGCAGCCAAAAGATCCAGTAGCTTACCATATTTTTAAGTACTC +TACCAGAGAATCCACAGAAATGTAAAGAACTTTTTTTATAACAATATTCTATGATAAATT +TTTTTTTACTTTCCGTAATTAAAGCAGATCGCAACAATCTCTGGTTAAGAGGTTCTCTCC +GGAGAGCCCTGGGCGGGCTCCCTTGCTTGTCCCCCACCCGTGTGGGTGTGGGTGTGGGGC +AGGTCTCCACTGACGAAAACAAGTATTAATCTTACATTTTTGCGCTTCGCACATACATAA +AAAACAAGAAGTAAATAATTAGTACGAGTAAGCTCAACAAGAGCCGTAAGCCTTGTTTCA +AACCTCACGGTGTTTACACCTCTCGCCTATCAAAGTGATGGTCTTTCACCTTTTATGAGA +ACCTGTATAAAGAAGGATTTCCCGCTTAGATGCTTTCAGCAGTTCTTCTATACCAACTTA +GCTACCCGGCGCTGCTATTGGCATAACAACCGGTACACCATAGGTTGACCCAACCCAGTC +CTCTCGTACTAGGGTTGGCCTCTCGCAGTTCTCTTTTAAACACCAACGGTAGATAGGAAC +CGAACTGTCTCACGACGTTCTAAACCCAACTCACGTACCACTTTCATCGGCGAACAACCG +AACCCTTGGGACCTTCTTCAACCCCAGGATGTGATGAGTCGACATCGAGGTGCCAAACGA +CTCCGTCGATAAGAGCTCTTGGGAGTCATCAGCCTGTTATCCCCGGCGTACCTTTGATCC +GTTGAGCGAGAGCCCTTCCACGCGGGACTCCCGGATCACTATGGCCGACTTTCGTCTCTG +TTCGACTGGTAAGTCTCACAGTCAGGCAGGCTTGTACCATTACGCTCTAAAGCTGTCAGA +ATATTAGCTTGAGCCTACCTTCGCACACCTCCGTTACTCTTTGGGCAATTATGTTCATTT +ACTCTGCAACATACTTGTCGTTGTATTTGATTCAATTGAAGTGCTTGCTAATATCTTTAT +TCCTTTTTCTTTAGTCATTTTGTTCATTTTTCGTATGGTTATTAGCAGTTTAAATGTCCT +ATTACTGTTTCGTAGTGAACAGTGCTTTAGGCTTCCAGTTTACTGGAAGTTCAGACTGTA +TCATTATCCAATCATTATTGCATTGGATACCTGGCATGCAGTCGTTGAGGGAGCATAAAT +AAATTTTTTTTATTTTCGCTACAATGTAGTGGCTTTTTTCAGCCTTCCCTGCTGATTGTC +CAAGGAATGGAGATCTCAGCATATAGCCAGATTGGCCTGGGATGTTACCATCCAAGCGCC +CCGATTTAAAGGCATCCGCCCCAGATAAACTAACCACCATGCAATGTCCCGCCTCTTTTT +CGGCAGTTAGACATCCTTAGACGAAAGAGTGGTATTTCAAGATTGGTGACGACGTTGCAC +GTCACCACCTCCCACCTATCCTACACATTCAATCAAGGTTGTCACTGCAAAGCTATAGTA +AAGGTGCACGGGGTCTTACCGTCTAGCCGTTGGTACTCCGCATCTTCACGGAGAGTTCAA +TTTCACTGAGTCCATGTTGGAGACAGCAGGGCAGTCGTTACACCATTCGTGCAGGTCGCT +ACTTATGCGACAAGGAATTTCGCTACCTTAGGACAGTTAGAGTTACTGCCGCCGTTTACT +GGGGCTTCCATTCGAAGCCTATAACACTTCTCCTTTTCACCTTCCAGCACCGGGCAGGTG +TCAGACTCTATACATCGTGTTACCACTTAGCAGAGTCCTGTGTTTTTAATAAACAGTCGC +TACCCCCTGGTATGTGCCGCTTTCCTAATCTAAGGATAGGAAAGCACCCCTTCTCCCGAG +GTTACGGGGTCATTTTGCCGAGTTCCTTCAACATGGTTCTCTCAAGCGCCTTAGTATACT +CTACTTGTTCACCTGTGTCGGTTTGGGGTACGGTTCGTTTTGCACTGGAAGAATCAAGAT +TCTCCCAATTCCACCAAGTTTTTTCCTGGAAGTCCATTGGCCTGCTTGATCATCCAATTC +ACGAGTTAAAGAAAAAACCCGTGGTTTAGTAGCCAAAGCAACTTCGTCACTTTTGTGTAC +CCATCGGATTAAGCCCTTTCGGGGGTTCCTTAGGGACCGATTCACTCTGCGTAGATTTAC +TGAACGCAGAAACCCTTGAACTTTTGGCGATCATGTTTTTCACATGATTTATCGTTACTC +ATGTCAGCATTCTCACTTCTGATATCTCCAGGTGTTGTCACCAACAACCTTCTTCGATTT +ACAGAACGTTCCGCTACTGACACTTGAAAAAAAAATATTTTTTTGCACTTTATTATGGGT +GCGCTTTGCTTACCCGAATAAGCCCCACTGCCGATACGCATAAGGGTACTGTAAGCGCGG +AGCATGCAATTACGCAGTAATTGCATATGGTGGGTGCTTTCAAGGTCTCGTCGCTTCGGT +GAATCACTTCAGCCCCGATACATTTTCGGTGCTATGAAGCTAGACCAGTGAGCTATTACG +CTTTCTTCAAAGGATGGCTGCTTCCAAGCCCACCTCCTGGTTGTCATCGCTCGATTACTT +CCTTTTCCACTAAGTGATTGCTTAAGGACCTTAGCGTACGATCTGGGCTGTTTCCCTCTT +GACTTCGGATCTTAGCACCCAAAGTCTGTCTGTACAAAATCATGGCCAGTATTCGGAGTT +TCCTTGGGGTTGGTCAAGCTTTGGGCCACCCTAACCCATTGAGTGCTCTACCTCAGGCCA +TAAACATTATACGCTCTACCTCAATAGATTTCGCGGAAAACCAGCTATCTCCGAGTTTGG +TAGGCCTTTCACCCCTAGCCACAAGTCATCCCCGTATTTTGCCACATACGTGGGTTCGGT +CCTCCAAGGCCCGTTAGAGCTCTCTTCAACCTGCTCATGGCTAGATCACTCAGTTTCGGG +TCAAATAGAAACAACTATATTCTTTATTTACTTTCAACTTCATTGCGCCTACACCTAATG +GCTTAAGCTTGCTGTGTCCATTTACTCGCTGACCCATTATGCAAAAGGTACGCCGTTAGA +GTGGAAAGTTCTGAATAACGAGGCGCAGAAGAGAACGCAATCTCTTCTTTCCCGCTGTTC +AGACTTAGCCTGCTTTGTCCTTCGACTGATTGTTTGCATCGGATTTCAGGTTTTCTATTG +CACTCCCTTTCTTAGGGTTCTTTTCACCTTTCCCTCACGGTACTTGTACGCTATTGGTCA +TTGAGGAATACTTAGGCTTAGAGGGTGGTCCCCCTTGGTTACATAAGAGCAATCATAATT +CAAACACAGTATCCGCGTTTTACTTATCGAATTGAACCATAGGAAAAAATCTACAGGGCT +ATCACCTTCTTTGGCAAGATTTTCCAACCTTTTCACAATTCCTTGAATGGTCTTTCCATC +ATTCAATTTCAAACAAATTGAATGAAGAGAGAAGGCCGCAGGAGAGCGCGCAAAGCGCTT +TCTCCTGCGGCCTTCTCACAAAGCCTAATCCGCTTTCGCTCGCCGCTACTAACGGAGTCT +CGGTTGATTTCCTTTCCTTTAGCTACTTAGATGTTTCAGTTCGCTAAGTTTTGAAAGTCC +AAGGCGGAACGCAGCACACTAATGCGCCGCTCCGCTTGGATACGGTTTCCCGATTGGAGA +TCCATGGATCACAGACGGTATCTCCCCATGGCGTTTCGCCCTTGAAAGCGTCCTTCCTTC +TCAATGCCTAGGCATCCATCCGATGCATTATTTTGAATACGGTAGGAATTGCACCTACTC +CACTAGTCACTACCAAAATATTCGCCAATGAGGCTTCTATTCATACTTCAAAATGGTGGG +CATGCCTTCTGATCAAGTCATTTACTGGAGCGCCTTCGAAGGTGCACTCGGGCTTGAAGA +CAAAGGTTTATGCACTTTGTGCATATTTTTACAAAAAAAAGAAAGATAAAGAACCTGATA +GAAAAGCATACGAAAATCCAGGCCACTAAAATAACTTAACCAATCCACCCTTTAATTATC +TTGTAAATTACTATTCCAAATCATCAATATAATACATATCTTATATTTACATATCGCATT +GTTCGCACTACAAGTAGAATCCGGGTTTCCTTTCACCTAAATCCAATAAATGAGCATTCA +CCTTATCATTTAATTCACACCATTTATATAAGTGAACATTTTAATATTTTCATACCTGAA +AGATAAATCAGGAGCGCGCAGAAACAACCAAATGATGGATGCACTGCTTTCTAAATTCCA +CTGTTCAACAAAAACAATTTATAACATTTAGCGGGAGTAAGATTCGAACCTACAACATTC +AGATTATGAGCCTGACGAGTTACCAATTACTCTATCCCGCGCACATAATAAAGGCGGCGC +TGCAAAATATTTTTTTTGCGCAGCAGTTATCCGCGCTGCCCCAAGTATAGCGGTTTATTA +TAAATATGTAATTTTTTTATTGCGAGATTTCTCTATGATGATTCATCCATCGGCGTCCAG +TGTTGCGTAGCAATACTTGGCGGGCTTAGTCCTCAAGGCTTCGCCCCCTTTTAGACGATC +TAGTTCATGACTGCGTGCTTGGCGGAACTTGGTCGCGTGTGCTCCTCATTCGTAAAGCTA +ACTAAGCGGGTCCTTCGGCCCATAAGCTGCGAAGCTGCTTTGTTGGGGGATATTTGTAGA +TGCTTTATGGCTCAGTTTATTACGCATGGTCAATGAAATATGTTTCAGCATTATCAGTAT +ATGATGACGGCGCACGCCCGCTTGGAGCCACACCCCGTATTATTATGTATGACGATCATA +TACGATATTGTATTATTATGCATTATTATTATGTACAATATTGTTAGTTTTGTAAATAGT +ATTTATGCTATCGCTAGTACAGAATATTCATTTCCTTTTTTACTCTTACTTTTCCGACGA +TCAAAAAACGAAATCTGCGCCTTCGGCGCTGCTTTTCTTCTTCGGCGCAGCCTTCTTTCG +CAGGCCAGAAAAAGGCACTACCCCCACAAGATAACCTACTACACCTAAATCTAGAGAGTA +TACGGTTCGTTTTGCACCGGATTCTTCCAATTCCACCAAGTTTTTTCTTCGTGCCCTGCC +TGGTCGAATTCGCTTCATGGATCGTATTTATTGAATTCTGTATTTGCTCTAGCGCCCTGG +CAAAAGGAGGTCAAGAAACTATATACGATGAAAAAGCATGGGTTGTTTTTTATTTTTAAT +ATAGTCGTTTGGCAGGCCTATGCTACTATTTTAATAGCGTTTCAAAATAATTTAAATTAG +ACGTTGTCCGGGCTTGGACCATGTCTCCCGAAATTTATAAATCAGTACATATAGCGTAAG +ACGATTTCACATTTCGAGGTCGGAATGGGATCGGGTGTTTTCACGTCTTACCATAGTGCC +CGGAAGCGCGTATCGATTAATGAATAGAGTAAAGAGTGTACTTGCCCAGTCGTAGGTTTC +CTGCATTCAATGGGGCAGACTACACAAGTGCTCTCCGGCGATAGTCGCCCATCACATGGC +TCTCTAACTTGACTTTATTTTGATTTCTTGTAAACCCTTCCGGCCACTCTTTCGAGTCAG +TTGCTTGTCGAAGCGCCCGGCCCCACAAGGCCGCGAGCCACGCGGCGGCTGCACCAAATT +GTTGCCGGTGATGCCGCCGCTGTATGGAATTTTACGTCTTTATCTGATTAGGGGATTTAC +AAGTTACTGCATATATATAACAAGCAGATTTACTTCAGATTCCTAAATAAATCTAATTAT +TGATTATAACTTACATACATAAGGTACGGCCGCTTCTTGGGATGCTTTCAGCGTTGTTTG +TTTTTTACATGGCTTGTCCAATCGCAGGATTGGGCGCCCTGCCCTATGTTATAATGTCGT +TTAGTGCCCCGTCCTCAGTAATATGGGTGCTGCGCTCCCTATCCTGACTTATCCTTTACG +TATGAATAAAGTAATTAAAAGCAAGCCATTTAGACTTAGTTAAATCTATCCTGCGACGAG +CTGGCGAGCAAGCAACGTACATTTGCATGCGCTCCCATTCGCGGAGCTGCAACAATGAAT +CATCTATGATGATTCATAAGCTACTGGCATCGCGGCTAATTCTATTTTGTTGACATAAAA +TGAGAGTATAGCAACTCTTGTTTATGTAGTTTGTGAACAAGATCTATAATTAAACCTAAA +GGTGTACTGTATATGATTGATCGCTTTGTCAGAACAACATTCATTATGTTGGATTTATAG +TTATGCTAAGAAACATCGTAAATCTTTCTTTACCATAGCTTATCATAGAGGGAGATTTAG +GGTAAGGTTAGTGACCAGCAAAAGAACATATATATATCTTTTCTTTGCCTCCCACTATGC +CGTAGGTTAAATTCCTATCGGAAGAGGGATTTGAACCCCCGTGTGCGAATTATGATCCCG +CTGTTCTAACCAACTAAACTATTCCGACATGCGAATTATTATTCCGCTGTTCTACTAATC +TGCCGCTTCCTGGCTGTTGGATGATAATTCGCTTCATGCCCTGTGGCCTGGTCCCGTAAA +AAAGATATGGCTTCGTCATTCTGAACGATAGGAGTAACTGTGTATGTATAGAGTTCAAAC +GGAATGGATGATTAAAGCGCCTTATGCCTTGGTCATATTTATAATTGCGACTGAACTTGT +GACTGCCGTTATTATGTAGTAGTAAAGGCATTCACGAGCGTAATTAGAAAGGCATGCAGT +ACTGCGGGGCACAACGGCTCTGACTGACATAGGCCAAAACTTCAATCAAGCCTTTAAAGG +CTAATATTGCAGCCTTTATGCCAGCCGCTTTACCTCCGGGCTATGTAACTATAGTCCAAA +GAATGAAGAGCACAGGGCGTATCTACAATTTTCAGAAAAAGTAAATATTTTTTTTGTTCG +AGAAAGGAGTCAATCCAGCCACAGGTTCCCCTACGGCTACCTTGTTACGACTTCACCTCA +GTCAAAGGCCCCACCTTGGTATCCCACATTAAACCACCAATTACTCTGGTAGACCACACT +CAACAACGGCATGGAACACTGGAATAATGGGTGATCCTTGGTCTGATGCTTCGGGCGAAG +CCAATTCCCATGGTGTGACGGGCGGTGTGTACAGGGCCTGAGTACATATTCACCGCGGCA +TGCTGATCCGCGATTACTAGCGATTCCAACTTCATGTTCTCGAGTTGCAGAGAACAATCC +GAACTGAGGCCATCTTTCTGGATTCGCTCCGCCTTAAAGCCTTGCTTCCTATTGTAATTG +CCATTGTAGCACGTGTGTAGCCCAGCCCATAAGGGCCATGCGGACTTGACGTCATCCCCA +CCTTCCTCCAGTATATCACTGGCAGTTTCTTGTGAGTGCAGTACATGGCATGGATTGTCA +ATTATTTTTACAAAGACTGGGTGCCACATTGTTATGTGTGCCACGCTATGAGAGCTGTGC +TCGTCGAAGTACCCTTCGACAACTGTGTAGTCGTAGCCACACGGTGTTGTGATATCCTTT +TTTAATGTTGTGGTCTTCGTTACTTTTAAGTGCTCAATAATCGATCCTTTAATCCAAAAC +GCATGTCTTAGCAACACAAAACGAGGGTTGCGCTCGTTATAGGACTTAACCCAACATCTC +ACGACACGAGCTGACGACAGCCATGCAGCACCTGTATAAATTTTCGTACCATCCCATTAA +GGACAAGCAAACTTATTCATATGTCAAGGGCTGGTAAGGTTTTGCGCGTTGTATCGAATT +AAACCACATGCTCCACCGCTTGTGCAGGCCCCCGTCAATTCCTTTGAGTTTCAGCCTTGC +GGCCGTACTCCCCAGGCGGAGTGTTTAACGCGTTAGCTGGGCCCCTGATCAGCCATTTTG +CATACAGCGCAGACCAAGGACGAACACTCATCGTTTACGGCATAGACTACCAGGGTCATA +TTGAAGATAGTTTTTCGCCAGCATTGAGCCCTTGTTCAGTGGCTTAGCTGCTTCTCGAGG +TCATGATGGAAAAGAAATAATCCCATCACGATCACGTAGTGATGATTATCTTCAAAAATG +GACTATATCATTCTCTGAACTCCAATCTTTGCAGTTGCTTTCTTTTTTATTGGCTAACTA +TTGGTTAATGCACTTCAAAAAAAGAGGCAGAGCTTTCTTCATTTCAGAGATCCAGCGTTG +TTGAATTTCAGGATCTTTGTAAACCAATAATATGGCTTTCCTGGGCATGCTCGCCACGGG +AACATGTGGTAAGATCATTCAAAACAGTCTCTTAAGTTCCTCAATACCAGACCAAAGGCG +GCAGTAAGACTCTTGGCTTGAATTACCGTACTTTCGATTTTTTCGGATAGGCCCATAAAC +CTTCCTTTTGAAAAGTGTCTAAGTCTCGGGCTAAAAGCTTAACATATTCTTTGTCGAAGT +CATCAGTAGCCACTTGTTCTGGCCAAAGCCATCGACGGCTTGCCCCTGCACGGAACTTGG +TACCAAGATTTCATGCAGTGAGCCTACTTTCCATCCACCGAAAGTAATTTTCCTTCATTT +GGCTATAACGAACGGAGAACCTTGCCATCTCTTAGGAGTTATCCTAAGATAATTGATTTC +ATTAAATCCGACAAAGATATTTGCTTGAGTCTCTTCAAGTGCAGGTTAGGAGCTTGAGCT +TTGCCTATTTTTCTTTATATCTTCTATTTCCCTATTTCCATAGTCTCTACGGGGTGCTCT +TGATCCGAGATTAAAGCACTTCCCTCGGGATTGTCTGATGTTAAATGGTACATTCACATC +AAAGTTTCCCCGAAATAGCTGGATGCACACTCTGACCTCGCGATCAGAGGGGACACCTTT +GACCGTGTTTCATTTAGAGCCCGGCCGGATCTATCTAATCCTGTTTGCTCCCTATGCTTT +CGCACCTCAGCGTCAGTAGAGACCCAGAAAGCTGCCTTCGCTTTTGGTGTTCCTTCGTAT +ATCTGTGAATTTTATCTCTACACACGAAATTCCACTATCCTCTATCTTACTCAAGTGAAT +TGGTTTTGAAAGCATTCCGCCAGTTGAGCTGGCGACTTTCACTTTCAACCGGATTCACCG +CCTACGTGCCCTTTACGCCTAGTCATTCCGAATAACACTAGCCCCCCCCGTCTTACCGCG +GCTGCTGGCACGGAGTTAGCCGGGGCTTCTTATTCAAGTCTTGTCACAATCGCACACTCG +ATGAAAGAGCTTTACAAGCTGCGTTGCCCTTCTTCACTCACGCCATATTGCCGGATCAGG +CTTTCGCCCATTGTCCAAGATTCCCCACTGCTGCCTCCCGTAGGAGTCTGGGCCGTGTCT +CAGTCCCAGTGTGGCTGATCATCCGAAAAGACCAGCTAAGCATCGTAGGCTTGGTCAGCT +TTTACCTAACCAACTACCTAATACTACGTGGGCTCATCAAACAGCGCTTTTTAGCTTTCA +TTCATTCAGGATTTGGCCCAAACTGTTTGGCAGATTCCCACGTGTTACGCACCCGTTCGC +CACTTTGTTTTCATTTTGACTCGAAAACAACGTTCAACTTGCATGTGTTAAGCATATCGC +TAGCGTTCATTCTGAGCCAGGATCAAACTCTTTTTTTTAAGTATGATTTTTTACACAGAA +GTAGGTTTTGAACCTACCAAACTTCCAATATAAAACCTCTGCGTATCACTAATTAAATCA +TTATCATACTAAAGTTTACTACCCATAAACCAAAAAGGATTCACTATGTAGAACCTTTGG +TCCAATCAACTTGTTATGCTTTCGCATTACGTAATTACGTAGTGATTGCAAATTGATTGC +AGATTGCCAGTATGCTATGCTGTATGGAGTACCACGGTCTTCACTGTAGGTGGCCTTGTC +TAGCTTTCATCTTTCAATATAAAGGTGAATAATTTTAATAAAGAACAGGGCGCGAAGCGC +ATCTTTGTTGCTCTTGATCCTAATTCCCAAATTCCCTTTTTTGCTGTATCCTGCTTCGAG +GCTCTGCCAACATTGCAATAAACCTAGTTGTTCACAGTGAAGCGAAGCAATGACGTAGAT +TTCGACGAAGGCTTCGTTTCGGTTTTGATAAGAATTATATTTTTGGGCTGTAGTTTTTTG +AGGCTGCAAGATTAATCGCAATTTTGAATCCAAGCCGTCGTTATTTATCTATGATTATTC +ATGGCTATTCCTCATGGAATTACTTAGTTAATAACATAATTAAAAAATTAATAATCTATT diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_checkm_qa.out b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_checkm_qa.out new file mode 100644 index 00000000..bbd100a7 --- /dev/null +++ b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_checkm_qa.out @@ -0,0 +1,51 @@ +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Bin Id Marker lineage # genomes # markers # marker sets 0 1 2 3 4 5+ Completeness Contamination Strain heterogeneity +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + bins.24 k__Bacteria (UID203) 5449 104 58 0 16 12 9 2 65 100.00 752.93 5.58 + bins.2 root (UID1) 5656 56 24 0 0 0 5 5 46 100.00 654.08 15.82 + bins.32 k__Bacteria (UID203) 5449 104 58 8 6 33 55 2 0 98.75 159.64 34.29 + bins.14 k__Bacteria (UID203) 5449 104 58 1 49 48 6 0 0 98.28 51.83 37.88 + bins.40 k__Bacteria (UID2982) 88 230 148 4 220 6 0 0 0 97.30 3.38 33.33 + bins.20 p__Proteobacteria (UID3887) 1487 259 162 10 249 0 0 0 0 95.68 0.00 0.00 + bins.1 o__Rhizobiales (UID3642) 107 485 316 17 461 7 0 0 0 95.46 1.74 42.86 + bins.23 k__Bacteria (UID2982) 88 227 146 37 43 57 65 20 5 94.27 138.49 24.88 + bins.11 k__Bacteria (UID3187) 2258 188 117 13 173 2 0 0 0 92.90 0.93 50.00 + bins.12 k__Bacteria (UID3187) 2258 188 117 11 175 2 0 0 0 91.45 1.71 0.00 + bins.21 k__Bacteria (UID3187) 2258 188 117 54 132 2 0 0 0 90.86 1.71 0.00 + bins.28 k__Bacteria (UID203) 5449 104 58 28 16 46 13 1 0 90.50 91.23 60.44 + bins.17 p__Actinobacteria (UID1454) 732 200 117 50 148 2 0 0 0 80.85 0.71 50.00 + bins.19 c__Gammaproteobacteria (UID4202) 67 481 276 117 304 54 6 0 0 79.71 16.20 23.61 + bins.37 k__Bacteria (UID1452) 924 161 108 31 128 2 0 0 0 77.31 1.01 0.00 + bins.22 k__Bacteria (UID2982) 88 230 148 52 157 21 0 0 0 76.58 7.22 19.05 + bins.31 k__Bacteria (UID203) 5449 104 58 47 35 19 3 0 0 74.61 37.38 50.00 + bins.29 o__Actinomycetales (UID1696) 455 311 187 88 135 63 18 7 0 72.12 41.74 12.58 + bins.43 c__Gammaproteobacteria (UID4202) 67 481 276 170 301 10 0 0 0 67.30 2.81 20.00 + bins.13 k__Bacteria (UID203) 5449 104 58 52 41 11 0 0 0 66.07 17.24 36.36 + bins.33 k__Bacteria (UID203) 5449 104 58 26 52 25 1 0 0 63.79 8.12 10.71 + bins.38 k__Bacteria (UID3187) 2258 188 117 92 95 1 0 0 0 58.97 0.85 0.00 + bins.42 k__Bacteria (UID2982) 88 230 148 122 100 8 0 0 0 55.61 4.73 0.00 + bins.15 k__Bacteria (UID203) 5449 104 58 43 33 20 7 1 0 55.02 39.50 38.30 + bins.7 k__Bacteria (UID3187) 2258 187 116 97 86 4 0 0 0 54.13 3.45 0.00 + bins.16 k__Bacteria (UID203) 5449 104 58 70 24 10 0 0 0 50.00 13.79 0.00 + bins.30 k__Bacteria (UID203) 5449 104 58 65 34 5 0 0 0 48.90 5.17 40.00 + bins.6 c__Alphaproteobacteria (UID3305) 564 348 229 198 148 2 0 0 0 45.45 0.66 50.00 + bins.41 c__Gammaproteobacteria (UID4202) 67 481 276 255 205 16 5 0 0 38.95 2.73 35.48 + bins.8 k__Bacteria (UID203) 5449 104 58 71 33 0 0 0 0 38.40 0.00 0.00 + bins.45 c__Alphaproteobacteria (UID3305) 564 349 230 223 106 20 0 0 0 38.17 6.75 40.00 + bins.46 c__Gammaproteobacteria (UID4202) 67 481 276 306 174 1 0 0 0 37.54 0.36 0.00 + bins.39 o__Rhizobiales (UID3654) 92 481 319 320 157 3 1 0 0 35.08 1.41 16.67 + bins.34 k__Bacteria (UID203) 5449 104 58 84 20 0 0 0 0 24.19 0.00 0.00 + bins.18 k__Bacteria (UID203) 5449 104 58 82 22 0 0 0 0 23.67 0.00 0.00 + bins.10 k__Bacteria (UID203) 5449 103 57 94 9 0 0 0 0 14.91 0.00 0.00 + bins.27 root (UID1) 5656 56 24 54 2 0 0 0 0 8.33 0.00 0.00 + bins.5 k__Bacteria (UID203) 5449 104 58 99 5 0 0 0 0 5.17 0.00 0.00 + bins.47 root (UID1) 5656 56 24 55 1 0 0 0 0 4.17 0.00 0.00 + bins.36 root (UID1) 5656 56 24 55 1 0 0 0 0 4.17 0.00 0.00 + bins.35 root (UID1) 5656 56 24 55 1 0 0 0 0 4.17 0.00 0.00 + bins.3 root (UID1) 5656 56 24 55 1 0 0 0 0 4.17 0.00 0.00 + bins.26 root (UID1) 5656 56 24 55 1 0 0 0 0 4.17 0.00 0.00 + bins.9 root (UID1) 5656 56 24 56 0 0 0 0 0 0.00 0.00 0.00 + bins.44 root (UID1) 5656 56 24 56 0 0 0 0 0 0.00 0.00 0.00 + bins.4 root (UID1) 5656 56 24 56 0 0 0 0 0 0.00 0.00 0.00 + bins.25 root (UID1) 5656 56 24 56 0 0 0 0 0 0.00 0.00 0.00 +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_gtdbtk.ar122.summary.tsv b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_gtdbtk.ar122.summary.tsv new file mode 100644 index 00000000..9f4d96cb --- /dev/null +++ b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_gtdbtk.ar122.summary.tsv @@ -0,0 +1 @@ +No Archaeal Results for nmdc:wfmag-12-fxwdrv82.1 diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_gtdbtk.bac122.summary.tsv b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_gtdbtk.bac122.summary.tsv new file mode 100644 index 00000000..f18be971 --- /dev/null +++ b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_gtdbtk.bac122.summary.tsv @@ -0,0 +1,15 @@ +user_genome classification fastani_reference fastani_reference_radius fastani_taxonomy fastani_ani fastani_af closest_placement_reference closest_placement_radius closest_placement_taxonomy closest_placement_ani closest_placement_af pplacer_taxonomy classification_method note other_related_references(genome_id,species_name,radius,ANI,AF) msa_percent translation_table red_value warnings +bins.1 d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__Rhizobiales;f__Xanthobacteraceae;g__BOG-931;s__ N/A N/A N/A N/A N/A GCA_019075925.1 95.0 d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__Rhizobiales;f__Xanthobacteraceae;g__BOG-931;s__BOG-931 sp019075925 79.24 0.52 d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__Rhizobiales;f__Xanthobacteraceae;g__BOG-931;s__ taxonomic classification defined by topology and ANI classification based on placement in class-level tree GCA_003164375.1, s__BOG-931 sp003164375, 95.0, 78.29, 0.34 92.75 11 0.9596061802260266 Genome not assigned to closest species as it falls outside its pre-defined ANI radius +bins.11 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidobacteriales;f__SbA1;g__PALSA-188;s__ N/A N/A N/A N/A N/A GCA_003169715.1 95.0 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidobacteriales;f__SbA1;g__PALSA-188;s__PALSA-188 sp003169715 75.76 0.05 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidobacteriales;f__SbA1;g__PALSA-188;s__ taxonomic classification defined by topology and ANI classification based on placement in class-level tree GCA_019241735.1, s__PALSA-188 sp019241735, 95.0, 76.42, 0.06 86.89 11 0.9158913290576843 N/A +bins.12 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidoferrales;f__UBA7541;g__Palsa-295;s__ N/A N/A N/A N/A N/A GCA_003131985.1 95.0 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidoferrales;f__UBA7541;g__Palsa-295;s__Palsa-295 sp003131985 88.57 0.77 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidoferrales;f__UBA7541;g__Palsa-295;s__ taxonomic classification defined by topology and ANI classification based on placement in class-level tree GCA_003153585.1, s__Palsa-295 sp003153585, 95.0, 84.66, 0.82; GCA_003167215.1, s__Palsa-295 sp003167215, 95.0, 84.57, 0.76; GCA_013287135.1, s__Palsa-295 sp013287135, 95.0, 79.11, 0.33; GCA_013289715.1, s__Palsa-295 sp013289715, 95.0, 77.79, 0.22; GCA_013289695.1, s__Palsa-295 sp013289695, 95.0, 77.6, 0.24; GCA_003166795.1, s__Palsa-295 sp003166795, 95.0, 77.44, 0.16; GCA_013286165.1, s__Palsa-295 sp013286165, 95.0, 77.34, 0.15; GCA_003156635.1, s__Palsa-295 sp003156635, 95.0, 76.73, 0.16; GCA_003131705.1, s__Palsa-295 sp003131705, 95.0, 76.38, 0.11 92.51 11 0.9840641626029654 Genome not assigned to closest species as it falls outside its pre-defined ANI radius +bins.17 d__Bacteria;p__Actinobacteriota;c__Thermoleophilia;o__Solirubrobacterales;f__Solirubrobacteraceae;g__Palsa-465;s__ N/A N/A N/A N/A N/A GCA_003137075.1 95.0 d__Bacteria;p__Actinobacteriota;c__Thermoleophilia;o__Solirubrobacterales;f__Solirubrobacteraceae;g__Palsa-465;s__Palsa-465 sp003137075 81.98 0.63 d__Bacteria;p__Actinobacteriota;c__Thermoleophilia;o__Solirubrobacterales;f__Solirubrobacteraceae;g__Palsa-465;s__ taxonomic classification defined by topology and ANI classification based on placement in class-level tree GCA_009785395.1, s__Palsa-465 sp009785395, 95.0, 78.29, 0.28; GCA_017354065.1, s__Palsa-465 sp017354065, 95.0, 77.83, 0.19; GCA_017882465.1, s__Palsa-465 sp017882465, 95.0, 77.42, 0.25; GCA_019240015.1, s__Palsa-465 sp019240015, 95.0, 77.29, 0.16; GCA_019240115.1, s__Palsa-465 sp019240115, 95.0, 77.26, 0.27; GCA_017882405.1, s__Palsa-465 sp017882405, 95.0, 77.19, 0.25; GCA_019239375.1, s__Palsa-465 sp019239375, 95.0, 77.14, 0.18; GCA_019240305.1, s__Palsa-465 sp019240305, 95.0, 77.08, 0.15; GCA_019241635.1, s__Palsa-465 sp019241635, 95.0, 77.06, 0.2; GCA_003244035.1, s__Palsa-465 sp003244035, 95.0, 77.02, 0.19; GCA_019240655.1, s__Palsa-465 sp019240655, 95.0, 77.01, 0.23; GCA_005883415.1, s__Palsa-465 sp005883415, 95.0, 77.01, 0.13; GCA_019244395.1, s__Palsa-465 sp019244395, 95.0, 76.96, 0.2; GCA_019244745.1, s__Palsa-465 sp019244745, 95.0, 76.95, 0.19; GCA_019239445.1, s__Palsa-465 sp019239445, 95.0, 76.94, 0.2; GCA_019247075.1, s__Palsa-465 sp019247075, 95.0, 76.9, 0.16; GCA_003165655.1, s__Palsa-465 sp003165655, 95.0, 76.39, 0.15 68.98 11 0.9747606806435155 Genome not assigned to closest species as it falls outside its pre-defined ANI radius +bins.20 d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Burkholderiaceae;g__GJ-E10;s__ N/A N/A N/A N/A N/A GCA_903871435.1 95.0 d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Burkholderiaceae;g__GJ-E10;s__GJ-E10 sp903871435 77.81 0.28 d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Burkholderiaceae;g__GJ-E10;s__ taxonomic classification defined by topology and ANI classification based on placement in class-level tree GCA_900290335.1, s__GJ-E10 sp900290335, 95.0, 78.27, 0.3; GCA_900290295.1, s__GJ-E10 sp900290295, 95.0, 78.16, 0.24; GCA_000828975.1, s__GJ-E10 sp000828975, 95.0, 78.02, 0.21 95.89 11 0.9552906587742479 N/A +bins.21 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidobacteriales;f__Koribacteraceae;g__TOLSYN;s__ N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidobacteriales;f__Koribacteraceae;g__TOLSYN;s__ ANI classification based on placement in class-level tree GCA_903832295.1, s__TOLSYN sp903832295, 95.0, 80.31, 0.46; GCA_903851035.1, s__TOLSYN sp903851035, 95.0, 78.62, 0.4; GCA_003010405.1, s__TOLSYN sp003010405, 95.0, 78.35, 0.28 71.43 11 0.9404257107269934 N/A +bins.22 d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Pedosphaerales;f__Pedosphaeraceae;g__;s__ N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Pedosphaerales;f__Pedosphaeraceae;g__;s__ taxonomic novelty determined using RED classification based on placement in class-level tree N/A 61.66 11 0.8379272061383479 N/A +bins.33 d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__ATCC43930;f__Stellaceae;g__REEB95;s__ N/A N/A N/A N/A N/A GCA_018971215.1 95.0 d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__ATCC43930;f__Stellaceae;g__REEB95;s__REEB95 sp018971215 79.29 0.5 d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__ATCC43930;f__Stellaceae;g__REEB95;s__ taxonomic classification defined by topology and ANI classification based on placement in class-level tree N/A 57.61 11 0.9007596592919825 Genome has more than 12.5% of markers with multiple hits;Genome not assigned to closest species as it falls outside its pre-defined ANI radius +bins.37 d__Bacteria;p__Eremiobacterota;c__Eremiobacteria;o__Baltobacterales;f__Baltobacteraceae;g__JAFAMS01;s__ N/A N/A N/A N/A N/A GCA_019233165.1 95.0 d__Bacteria;p__Eremiobacterota;c__Eremiobacteria;o__Baltobacterales;f__Baltobacteraceae;g__JAFAMS01;s__JAFAMS01 sp019233165 77.35 0.24 d__Bacteria;p__Eremiobacterota;c__Eremiobacteria;o__Baltobacterales;f__Baltobacteraceae;g__JAFAMS01;s__ taxonomic classification defined by topology and ANI classification based on placement in class-level tree N/A 73.23 11 0.9116654810893265 N/A +bins.38 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidoferrales;f__UBA7541;g__Acidoferrum;s__Acidoferrum sp903970165 GCA_903970165.1 95.0 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidoferrales;f__UBA7541;g__Acidoferrum;s__Acidoferrum sp903970165 99.79 0.95 GCA_903970165.1 95.0 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidoferrales;f__UBA7541;g__Acidoferrum;s__Acidoferrum sp903970165 99.79 0.95 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidoferrales;f__UBA7541;g__Acidoferrum;s__ taxonomic classification defined by topology and ANI topological placement and ANI have congruent species assignments GCA_013289955.1, s__Acidoferrum sp013289955, 95.0, 78.91, 0.48; GCA_013289825.1, s__Acidoferrum sp013289825, 95.0, 78.67, 0.38; GCA_003224075.1, s__Acidoferrum sp003224075, 95.0, 77.6, 0.13; GCA_013289835.1, s__Acidoferrum sp013289835, 95.0, 77.59, 0.21; GCA_013289585.1, s__Acidoferrum sp013289585, 95.0, 77.57, 0.21; GCA_003224525.1, s__Acidoferrum sp003224525, 95.0, 77.49, 0.19; GCA_003224055.1, s__Acidoferrum sp003224055, 95.0, 77.45, 0.21; GCA_018268785.1, s__Acidoferrum sp018268785, 95.0, 77.44, 0.12; GCA_003225315.1, s__Acidoferrum sp003225315, 95.0, 77.23, 0.2; GCA_003223295.1, s__Acidoferrum sp003223295, 95.0, 77.21, 0.17; GCA_003224085.1, s__Acidoferrum sp003224085, 95.0, 77.2, 0.15; GCA_003225295.1, s__Acidoferrum sp003225295, 95.0, 77.15, 0.17; GCA_003154775.1, s__Acidoferrum sp003154775, 95.0, 77.05, 0.12; GCA_002478115.1, s__Acidoferrum typicum, 95.0, 77.04, 0.11; GCA_013289855.1, s__Acidoferrum sp013289855, 95.0, 77.01, 0.16; GCA_003161195.1, s__Acidoferrum sp003161195, 95.0, 76.97, 0.14; GCA_013289665.1, s__Acidoferrum sp013289665, 95.0, 76.93, 0.12; GCA_001917435.1, s__Acidoferrum sp001917435, 95.0, 76.93, 0.17; GCA_003224145.1, s__Acidoferrum sp003224145, 95.0, 76.92, 0.13; GCA_001914785.1, s__Acidoferrum sp001914785, 95.0, 76.92, 0.18; GCA_003223255.1, s__Acidoferrum sp003223255, 95.0, 76.86, 0.15; GCA_001919715.1, s__Acidoferrum sp001919715, 95.0, 76.86, 0.09; GCA_003223245.1, s__Acidoferrum sp003223245, 95.0, 76.84, 0.14; GCA_003223215.1, s__Acidoferrum sp003223215, 95.0, 76.82, 0.13; GCA_003224105.1, s__Acidoferrum sp003224105, 95.0, 76.71, 0.12; GCA_019239815.1, s__Acidoferrum sp019239815, 95.0, 76.69, 0.11; GCA_003224135.1, s__Acidoferrum sp003224135, 95.0, 76.42, 0.07 46.54 11 N/A N/A +bins.40 d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Pedosphaerales;f__UBA11358;g__UBA11358;s__ N/A N/A N/A N/A N/A GCA_003133815.1 95.0 d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Pedosphaerales;f__UBA11358;g__UBA11358;s__UBA11358 sp003133815 81.93 0.55 d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Pedosphaerales;f__UBA11358;g__UBA11358;s__ taxonomic classification defined by topology and ANI classification based on placement in class-level tree GCA_003455565.1, s__UBA11358 sp003455565, 95.0, 81.23, 0.48; GCA_003139955.1, s__UBA11358 sp003139955, 95.0, 81.19, 0.53; GCA_903944085.1, s__UBA11358 sp903944085, 95.0, 80.15, 0.57; GCA_903878805.1, s__UBA11358 sp903878805, 95.0, 80.14, 0.46; GCA_903884535.1, s__UBA11358 sp903884535, 95.0, 80.0, 0.52; GCA_903833055.1, s__UBA11358 sp903833055, 95.0, 79.76, 0.44; GCA_903918885.1, s__UBA11358 sp903918885, 95.0, 79.69, 0.46; GCA_903827955.1, s__UBA11358 sp903827955, 95.0, 79.41, 0.5; GCA_903850425.1, s__UBA11358 sp903850425, 95.0, 79.39, 0.51; GCA_903870815.1, s__UBA11358 sp903870815, 95.0, 79.25, 0.42; GCA_903889895.1, s__UBA11358 sp903889895, 95.0, 79.12, 0.37; GCA_903921455.1, s__UBA11358 sp903921455, 95.0, 79.08, 0.45; GCA_903861445.1, s__UBA11358 sp903861445, 95.0, 79.05, 0.38; GCA_903858275.1, s__UBA11358 sp903858275, 95.0, 79.01, 0.4; GCA_903922735.1, s__UBA11358 sp903922735, 95.0, 78.81, 0.41; GCA_003151855.1, s__UBA11358 sp003151855, 95.0, 78.75, 0.38; GCA_903822015.1, s__UBA11358 sp903822015, 95.0, 78.7, 0.35; GCA_903917705.1, s__UBA11358 sp903917705, 95.0, 78.47, 0.31; GCA_903847285.1, s__UBA11358 sp903847285, 95.0, 78.4, 0.35; GCA_903865745.1, s__UBA11358 sp903865745, 95.0, 78.29, 0.37; GCA_903835255.1, s__UBA11358 sp903835255, 95.0, 78.21, 0.34; GCA_903911825.1, s__UBA11358 sp903911825, 95.0, 78.0, 0.3; GCA_903842235.1, s__UBA11358 sp903842235, 95.0, 77.95, 0.34; GCA_903888765.1, s__UBA11358 sp903888765, 95.0, 77.34, 0.19; GCA_903820285.1, s__UBA11358 sp903820285, 95.0, 77.3, 0.25; GCA_903872345.1, s__UBA11358 sp903872345, 95.0, 77.16, 0.31; GCA_903895255.1, s__UBA11358 sp903895255, 95.0, 76.88, 0.23; GCA_903936645.1, s__UBA11358 sp903936645, 95.0, 76.55, 0.15; GCA_903875465.1, s__UBA11358 sp903875465, 95.0, 76.44, 0.16 84.85 11 0.9423589220805788 Genome not assigned to closest species as it falls outside its pre-defined ANI radius +bins.42 d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Pedosphaerales;f__UBA11358;g__UBA7542;s__ N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Pedosphaerales;f__UBA11358;g__UBA7542;s__ taxonomic novelty determined using RED classification based on placement in class-level tree N/A 44.1 11 0.9096785979623646 N/A +bins.43 d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Steroidobacterales;f__Steroidobacteraceae;g__13-2-20CM-66-19;s__ N/A N/A N/A N/A N/A GCA_018241425.1 95.0 d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Steroidobacterales;f__Steroidobacteraceae;g__13-2-20CM-66-19;s__13-2-20CM-66-19 sp018241425 82.58 0.73 d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Steroidobacterales;f__Steroidobacteraceae;g__13-2-20CM-66-19;s__ taxonomic classification defined by topology and ANI classification based on placement in class-level tree GCA_018240525.1, s__13-2-20CM-66-19 sp018240525, 95.0, 81.57, 0.63; GCA_005878835.1, s__13-2-20CM-66-19 sp005878835, 95.0, 80.36, 0.5; GCA_005878095.1, s__13-2-20CM-66-19 sp005878095, 95.0, 80.28, 0.47; GCA_001914695.1, s__13-2-20CM-66-19 sp001914695, 95.0, 80.25, 0.43; GCA_019247195.1, s__13-2-20CM-66-19 sp019247195, 95.0, 80.2, 0.45; GCA_005877965.1, s__13-2-20CM-66-19 sp005877965, 95.0, 80.01, 0.41; GCA_019235245.1, s__13-2-20CM-66-19 sp019235245, 95.0, 79.85, 0.43; GCA_018241445.1, s__13-2-20CM-66-19 sp018241445, 95.0, 79.64, 0.45; GCA_019242385.1, s__13-2-20CM-66-19 sp019242385, 95.0, 79.44, 0.44; GCA_003136935.1, s__13-2-20CM-66-19 sp003136935, 95.0, 79.43, 0.43; GCA_003156695.1, s__13-2-20CM-66-19 sp003156695, 95.0, 78.98, 0.34; GCA_005877705.1, s__13-2-20CM-66-19 sp005877705, 95.0, 78.18, 0.26; GCA_018241505.1, s__13-2-20CM-66-19 sp018241505, 95.0, 77.02, 0.18 60.54 11 0.9759376136112458 Genome not assigned to closest species as it falls outside its pre-defined ANI radius +bins.7 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidobacteriales;f__Acidobacteriaceae;g__Terracidiphilus;s__ N/A N/A N/A N/A N/A GCA_003165005.1 95.0 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidobacteriales;f__Acidobacteriaceae;g__Terracidiphilus;s__Terracidiphilus sp003165005 80.68 0.48 d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidobacteriales;f__Acidobacteriaceae;g__Terracidiphilus;s__ taxonomic classification defined by topology and ANI classification based on placement in class-level tree GCA_003171315.1, s__Terracidiphilus sp003171315, 95.0, 78.84, 0.31; GCA_003138515.1, s__Terracidiphilus sp003138515, 95.0, 78.73, 0.38; GCF_900290245.1, s__Terracidiphilus gaucii, 95.0, 78.39, 0.36; GCA_003134815.1, s__Terracidiphilus sp003134815, 95.0, 78.09, 0.32; GCA_003139795.1, s__Terracidiphilus sp003139795, 95.0, 77.73, 0.23; GCA_003170825.1, s__Terracidiphilus sp003170825, 95.0, 77.65, 0.22; GCA_003134855.1, s__Terracidiphilus sp003134855, 95.0, 77.51, 0.2; GCA_003159355.1, s__Terracidiphilus sp003159355, 95.0, 77.48, 0.17; GCA_003142935.1, s__Terracidiphilus sp003142935, 95.0, 77.47, 0.23; GCA_002314435.1, s__Terracidiphilus sp002314435, 95.0, 77.46, 0.16; GCA_903849645.1, s__Terracidiphilus sp903849645, 95.0, 77.44, 0.21; GCA_015654835.1, s__Terracidiphilus sp015654835, 95.0, 77.44, 0.15; GCA_003165095.1, s__Terracidiphilus sp003165095, 95.0, 77.41, 0.22; GCA_000620725.1, s__Terracidiphilus sp000620725, 95.0, 77.34, 0.16; GCA_003138365.1, s__Terracidiphilus sp003138365, 95.0, 77.32, 0.22; GCA_018268915.1, s__Terracidiphilus sp018268915, 95.0, 77.31, 0.1; GCA_015655195.1, s__Terracidiphilus sp015655195, 95.0, 77.26, 0.14; GCA_003161045.1, s__Terracidiphilus sp003161045, 95.0, 77.26, 0.16; GCA_003162495.1, s__Terracidiphilus sp003162495, 95.0, 77.22, 0.12; GCA_903828675.1, s__Terracidiphilus sp903828675, 95.0, 77.16, 0.19; GCA_002307235.1, s__Terracidiphilus sp002307235, 95.0, 77.14, 0.15; GCA_903842065.1, s__Terracidiphilus sp903842065, 95.0, 77.12, 0.16; GCA_003133425.1, s__Terracidiphilus sp003133425, 95.0, 77.06, 0.21; GCA_903911925.1, s__Terracidiphilus sp903911925, 95.0, 77.05, 0.19; GCA_008682415.1, s__Terracidiphilus sp008682415, 95.0, 77.05, 0.15; GCA_003151435.1, s__Terracidiphilus sp003151435, 95.0, 77.04, 0.19; GCA_003165075.1, s__Terracidiphilus sp003165075, 95.0, 77.03, 0.18; GCF_003131205.1, s__Terracidiphilus savannae, 95.0, 76.98, 0.14; GCA_003165935.1, s__Terracidiphilus sp003165935, 95.0, 76.97, 0.12; GCA_003140705.1, s__Terracidiphilus sp003140705, 95.0, 76.93, 0.13; GCA_003142015.1, s__Terracidiphilus sp003142015, 95.0, 76.89, 0.17; GCA_015655365.1, s__Terracidiphilus sp015655365, 95.0, 76.86, 0.15; GCA_003156495.1, s__Terracidiphilus sp003156495, 95.0, 76.86, 0.15; GCA_003140785.1, s__Terracidiphilus sp003140785, 95.0, 76.86, 0.12; GCA_003167305.1, s__Terracidiphilus sp003167305, 95.0, 76.85, 0.17; GCA_003156235.1, s__Terracidiphilus sp003156235, 95.0, 76.83, 0.14; GCA_003151985.1, s__Terracidiphilus sp003151985, 95.0, 76.78, 0.12; GCF_001449115.1, s__Terracidiphilus gabretensis, 95.0, 76.73, 0.12; GCA_003166055.1, s__Terracidiphilus sp003166055, 95.0, 76.71, 0.17; GCA_017883405.1, s__Terracidiphilus sp017883405, 95.0, 76.68, 0.14; GCA_003164635.1, s__Terracidiphilus sp003164635, 95.0, 76.43, 0.12 43.96 11 0.9708336452632881 N/A diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_heatmap.pdf b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_heatmap.pdf new file mode 100644 index 00000000..b58b2402 Binary files /dev/null and b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_heatmap.pdf differ diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_hqmq_bin.zip b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_hqmq_bin.zip new file mode 100644 index 00000000..a5bcf346 Binary files /dev/null and b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_hqmq_bin.zip differ diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_ko_matrix.txt b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_ko_matrix.txt new file mode 100644 index 00000000..b58b2402 --- /dev/null +++ b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_ko_matrix.txt @@ -0,0 +1 @@ +No KO analysis result for nmdc:wfmag-12-fxwdrv82.1 diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_kronaplot.html b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_kronaplot.html new file mode 100644 index 00000000..b58b2402 --- /dev/null +++ b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_kronaplot.html @@ -0,0 +1 @@ +No KO analysis result for nmdc:wfmag-12-fxwdrv82.1 diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_lq_bin.zip b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_lq_bin.zip new file mode 100644 index 00000000..2db16278 Binary files /dev/null and b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_lq_bin.zip differ diff --git a/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_mags_stats.json b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_mags_stats.json new file mode 100644 index 00000000..2a326515 --- /dev/null +++ b/tests/test_pscratch/nmdc_mags/nmdc_wfmag-12-fxwdrv82.1_mags_stats.json @@ -0,0 +1,314 @@ +{ + "input_contig_num": 2273412, + "too_short_contig_num": 2005162, + "low_depth_contig_num": 0, + "unbinned_contig_num": 241036, + "binned_contig_num": 27214, + "mags_list": [ + { + "bin_name": "bins.40", + "eukaryotic_evaluation": { + "completeness": 17.71, + "contamination": 8.82, + "ncbi_lineage_tax_ids": "1-131567-2759-2611352-33682-191814-2603949", + "ncbi_lineage": "root,cellular organisms,Eukaryota,Discoba,Euglenozoa,Diplonemea,Diplonemidae" + }, + "number_of_contig": 44, + "completeness": 97.3, + "contamination": 3.38, + "total_bases": 0, + "gene_count": "null", + "bin_quality": "MQ", + "num_16s": 0, + "num_5s": 0, + "num_23s": 0, + "num_tRNA": 0, + "gtdbtk_domain": "Bacteria", + "gtdbtk_phylum": "Verrucomicrobiota", + "gtdbtk_class": "Verrucomicrobiae", + "gtdbtk_order": "Pedosphaerales", + "gtdbtk_family": "UBA11358", + "gtdbtk_genus": "UBA11358", + "gtdbtk_species": "null", + "members_id": [ + "nmdc:wfmgas-13-56028x05.1_7_c1", + "nmdc:wfmgas-13-56028x05.1_9_c1", + "nmdc:wfmgas-13-56028x05.1_16_c1", + "nmdc:wfmgas-13-56028x05.1_20_c1", + "nmdc:wfmgas-13-56028x05.1_23_c1", + "nmdc:wfmgas-13-56028x05.1_27_c1", + "nmdc:wfmgas-13-56028x05.1_45_c1", + "nmdc:wfmgas-13-56028x05.1_55_c1", + "nmdc:wfmgas-13-56028x05.1_71_c1", + "nmdc:wfmgas-13-56028x05.1_79_c1", + "nmdc:wfmgas-13-56028x05.1_99_c1", + "nmdc:wfmgas-13-56028x05.1_52_c2", + "nmdc:wfmgas-13-56028x05.1_127_c1", + "nmdc:wfmgas-13-56028x05.1_131_c1", + "nmdc:wfmgas-13-56028x05.1_137_c1", + "nmdc:wfmgas-13-56028x05.1_169_c1", + "nmdc:wfmgas-13-56028x05.1_200_c1", + "nmdc:wfmgas-13-56028x05.1_212_c1", + "nmdc:wfmgas-13-56028x05.1_223_c1", + "nmdc:wfmgas-13-56028x05.1_372_c1", + "nmdc:wfmgas-13-56028x05.1_393_c1", + "nmdc:wfmgas-13-56028x05.1_428_c1", + "nmdc:wfmgas-13-56028x05.1_52_c1", + "nmdc:wfmgas-13-56028x05.1_582_c1", + "nmdc:wfmgas-13-56028x05.1_706_c1", + "nmdc:wfmgas-13-56028x05.1_888_c1", + "nmdc:wfmgas-13-56028x05.1_912_c1", + "nmdc:wfmgas-13-56028x05.1_1268_c1", + "nmdc:wfmgas-13-56028x05.1_1271_c1", + "nmdc:wfmgas-13-56028x05.1_1492_c1", + "nmdc:wfmgas-13-56028x05.1_1494_c1", + "nmdc:wfmgas-13-56028x05.1_1604_c1", + "nmdc:wfmgas-13-56028x05.1_1627_c1", + "nmdc:wfmgas-13-56028x05.1_1888_c1", + "nmdc:wfmgas-13-56028x05.1_1938_c1", + "nmdc:wfmgas-13-56028x05.1_2944_c1", + "nmdc:wfmgas-13-56028x05.1_3261_c1", + "nmdc:wfmgas-13-56028x05.1_3477_c1", + "nmdc:wfmgas-13-56028x05.1_4194_c1", + "nmdc:wfmgas-13-56028x05.1_6257_c1", + "nmdc:wfmgas-13-56028x05.1_7589_c1", + "nmdc:wfmgas-13-56028x05.1_10469_c1", + "nmdc:wfmgas-13-56028x05.1_10553_c1", + "nmdc:wfmgas-13-56028x05.1_13792_c1" + ] + }, + { + "bin_name": "bins.20", + "eukaryotic_evaluation": { + "completeness": 17.71, + "contamination": 8.82, + "ncbi_lineage_tax_ids": "1-131567-2759-2611352-33682-191814-2603949", + "ncbi_lineage": "root,cellular organisms,Eukaryota,Discoba,Euglenozoa,Diplonemea,Diplonemidae" + }, + "number_of_contig": 85, + "completeness": 95.68, + "contamination": 0.0, + "total_bases": 0, + "gene_count": "null", + "bin_quality": "MQ", + "num_16s": 0, + "num_5s": 0, + "num_23s": 0, + "num_tRNA": 0, + "gtdbtk_domain": "Bacteria", + "gtdbtk_phylum": "Proteobacteria", + "gtdbtk_class": "Gammaproteobacteria", + "gtdbtk_order": "Burkholderiales", + "gtdbtk_family": "Burkholderiaceae", + "gtdbtk_genus": "GJ-E10", + "gtdbtk_species": "null", + "members_id": [ + "nmdc:wfmgas-13-56028x05.1_2_c1", + "nmdc:wfmgas-13-56028x05.1_8_c1", + "nmdc:wfmgas-13-56028x05.1_13_c1", + "nmdc:wfmgas-13-56028x05.1_19_c1", + "nmdc:wfmgas-13-56028x05.1_21_c1", + "nmdc:wfmgas-13-56028x05.1_26_c1", + "nmdc:wfmgas-13-56028x05.1_32_c1", + "nmdc:wfmgas-13-56028x05.1_37_c2", + "nmdc:wfmgas-13-56028x05.1_46_c1", + "nmdc:wfmgas-13-56028x05.1_47_c1", + "nmdc:wfmgas-13-56028x05.1_57_c1", + "nmdc:wfmgas-13-56028x05.1_76_c1", + "nmdc:wfmgas-13-56028x05.1_77_c1", + "nmdc:wfmgas-13-56028x05.1_84_c1", + "nmdc:wfmgas-13-56028x05.1_97_c1", + "nmdc:wfmgas-13-56028x05.1_117_c1", + "nmdc:wfmgas-13-56028x05.1_139_c1", + "nmdc:wfmgas-13-56028x05.1_141_c1", + "nmdc:wfmgas-13-56028x05.1_142_c1", + "nmdc:wfmgas-13-56028x05.1_160_c1", + "nmdc:wfmgas-13-56028x05.1_174_c1", + "nmdc:wfmgas-13-56028x05.1_182_c1", + "nmdc:wfmgas-13-56028x05.1_226_c1", + "nmdc:wfmgas-13-56028x05.1_289_c1", + "nmdc:wfmgas-13-56028x05.1_310_c1", + "nmdc:wfmgas-13-56028x05.1_329_c1", + "nmdc:wfmgas-13-56028x05.1_337_c1", + "nmdc:wfmgas-13-56028x05.1_373_c1", + "nmdc:wfmgas-13-56028x05.1_403_c1", + "nmdc:wfmgas-13-56028x05.1_410_c1", + "nmdc:wfmgas-13-56028x05.1_500_c1", + "nmdc:wfmgas-13-56028x05.1_540_c1", + "nmdc:wfmgas-13-56028x05.1_858_c1", + "nmdc:wfmgas-13-56028x05.1_1483_c1", + "nmdc:wfmgas-13-56028x05.1_1651_c1", + "nmdc:wfmgas-13-56028x05.1_1687_c1", + "nmdc:wfmgas-13-56028x05.1_1795_c1", + "nmdc:wfmgas-13-56028x05.1_1895_c1", + "nmdc:wfmgas-13-56028x05.1_2633_c1", + "nmdc:wfmgas-13-56028x05.1_2744_c1", + "nmdc:wfmgas-13-56028x05.1_3220_c1", + "nmdc:wfmgas-13-56028x05.1_3379_c1", + "nmdc:wfmgas-13-56028x05.1_3550_c1", + "nmdc:wfmgas-13-56028x05.1_3617_c1", + "nmdc:wfmgas-13-56028x05.1_3777_c1", + "nmdc:wfmgas-13-56028x05.1_1362_c2", + "nmdc:wfmgas-13-56028x05.1_3095_c2", + "nmdc:wfmgas-13-56028x05.1_5744_c1", + "nmdc:wfmgas-13-56028x05.1_5821_c1", + "nmdc:wfmgas-13-56028x05.1_6375_c1", + "nmdc:wfmgas-13-56028x05.1_6917_c1", + "nmdc:wfmgas-13-56028x05.1_7031_c1", + "nmdc:wfmgas-13-56028x05.1_7036_c1", + "nmdc:wfmgas-13-56028x05.1_7348_c1", + "nmdc:wfmgas-13-56028x05.1_9420_c1", + "nmdc:wfmgas-13-56028x05.1_9747_c1", + "nmdc:wfmgas-13-56028x05.1_1362_c3", + "nmdc:wfmgas-13-56028x05.1_9963_c1", + "nmdc:wfmgas-13-56028x05.1_10118_c1", + "nmdc:wfmgas-13-56028x05.1_10474_c1", + "nmdc:wfmgas-13-56028x05.1_10640_c1", + "nmdc:wfmgas-13-56028x05.1_10939_c1", + "nmdc:wfmgas-13-56028x05.1_11018_c1", + "nmdc:wfmgas-13-56028x05.1_11125_c1", + "nmdc:wfmgas-13-56028x05.1_11736_c1", + "nmdc:wfmgas-13-56028x05.1_13326_c1", + "nmdc:wfmgas-13-56028x05.1_13428_c1", + "nmdc:wfmgas-13-56028x05.1_13828_c1", + "nmdc:wfmgas-13-56028x05.1_37_c1", + "nmdc:wfmgas-13-56028x05.1_14715_c1", + "nmdc:wfmgas-13-56028x05.1_15174_c1", + "nmdc:wfmgas-13-56028x05.1_17767_c1", + "nmdc:wfmgas-13-56028x05.1_17928_c1", + "nmdc:wfmgas-13-56028x05.1_18037_c1", + "nmdc:wfmgas-13-56028x05.1_18887_c1", + "nmdc:wfmgas-13-56028x05.1_20885_c1", + "nmdc:wfmgas-13-56028x05.1_21369_c1", + "nmdc:wfmgas-13-56028x05.1_21783_c1", + "nmdc:wfmgas-13-56028x05.1_24945_c1", + "nmdc:wfmgas-13-56028x05.1_25148_c1", + "nmdc:wfmgas-13-56028x05.1_26641_c1", + "nmdc:wfmgas-13-56028x05.1_29321_c1", + "nmdc:wfmgas-13-56028x05.1_3095_c1", + "nmdc:wfmgas-13-56028x05.1_32796_c1", + "nmdc:wfmgas-13-56028x05.1_34090_c1" + ] + }, + { + "bin_name": "bins.9", + "eukaryotic_evaluation": { + "completeness": 17.71, + "contamination": 8.82, + "ncbi_lineage_tax_ids": "1-131567-2759-2611352-33682-191814-2603949", + "ncbi_lineage": "root,cellular organisms,Eukaryota,Discoba,Euglenozoa,Diplonemea,Diplonemidae" + }, + "number_of_contig": 92, + "completeness": 0.0, + "contamination": 0.0, + "total_bases": 0, + "gene_count": "null", + "bin_quality": "LQ", + "num_16s": 0, + "num_5s": 0, + "num_23s": 0, + "num_tRNA": 0, + "gtdbtk_domain": "null", + "gtdbtk_phylum": "null", + "gtdbtk_class": "null", + "gtdbtk_order": "null", + "gtdbtk_family": "null", + "gtdbtk_genus": "null", + "gtdbtk_species": "null", + "members_id": [ + "nmdc:wfmgas-13-56028x05.1_7094_c1", + "nmdc:wfmgas-13-56028x05.1_9486_c1", + "nmdc:wfmgas-13-56028x05.1_9853_c1", + "nmdc:wfmgas-13-56028x05.1_10857_c1", + "nmdc:wfmgas-13-56028x05.1_11702_c1", + "nmdc:wfmgas-13-56028x05.1_12042_c1", + "nmdc:wfmgas-13-56028x05.1_14174_c1", + "nmdc:wfmgas-13-56028x05.1_14597_c1", + "nmdc:wfmgas-13-56028x05.1_16115_c1", + "nmdc:wfmgas-13-56028x05.1_16261_c1", + "nmdc:wfmgas-13-56028x05.1_16795_c1", + "nmdc:wfmgas-13-56028x05.1_16943_c1", + "nmdc:wfmgas-13-56028x05.1_17208_c1", + "nmdc:wfmgas-13-56028x05.1_17245_c1", + "nmdc:wfmgas-13-56028x05.1_17383_c1", + "nmdc:wfmgas-13-56028x05.1_17783_c1", + "nmdc:wfmgas-13-56028x05.1_18468_c1", + "nmdc:wfmgas-13-56028x05.1_18553_c1", + "nmdc:wfmgas-13-56028x05.1_18858_c1", + "nmdc:wfmgas-13-56028x05.1_19302_c1", + "nmdc:wfmgas-13-56028x05.1_19824_c1", + "nmdc:wfmgas-13-56028x05.1_20316_c1", + "nmdc:wfmgas-13-56028x05.1_20787_c1", + "nmdc:wfmgas-13-56028x05.1_21029_c1", + "nmdc:wfmgas-13-56028x05.1_21435_c1", + "nmdc:wfmgas-13-56028x05.1_21475_c1", + "nmdc:wfmgas-13-56028x05.1_21484_c1", + "nmdc:wfmgas-13-56028x05.1_21518_c1", + "nmdc:wfmgas-13-56028x05.1_21685_c1", + "nmdc:wfmgas-13-56028x05.1_21809_c1", + "nmdc:wfmgas-13-56028x05.1_21924_c1", + "nmdc:wfmgas-13-56028x05.1_21958_c1", + "nmdc:wfmgas-13-56028x05.1_22186_c1", + "nmdc:wfmgas-13-56028x05.1_22271_c1", + "nmdc:wfmgas-13-56028x05.1_22516_c1", + "nmdc:wfmgas-13-56028x05.1_22514_c1", + "nmdc:wfmgas-13-56028x05.1_22777_c1", + "nmdc:wfmgas-13-56028x05.1_23003_c1", + "nmdc:wfmgas-13-56028x05.1_23115_c1", + "nmdc:wfmgas-13-56028x05.1_23204_c1", + "nmdc:wfmgas-13-56028x05.1_23239_c1", + "nmdc:wfmgas-13-56028x05.1_23352_c1", + "nmdc:wfmgas-13-56028x05.1_23445_c1", + "nmdc:wfmgas-13-56028x05.1_23505_c1", + "nmdc:wfmgas-13-56028x05.1_23571_c1", + "nmdc:wfmgas-13-56028x05.1_24047_c1", + "nmdc:wfmgas-13-56028x05.1_24749_c1", + "nmdc:wfmgas-13-56028x05.1_24981_c1", + "nmdc:wfmgas-13-56028x05.1_25059_c1", + "nmdc:wfmgas-13-56028x05.1_25526_c1", + "nmdc:wfmgas-13-56028x05.1_26162_c1", + "nmdc:wfmgas-13-56028x05.1_26376_c1", + "nmdc:wfmgas-13-56028x05.1_26773_c1", + "nmdc:wfmgas-13-56028x05.1_26816_c1", + "nmdc:wfmgas-13-56028x05.1_26891_c1", + "nmdc:wfmgas-13-56028x05.1_27179_c1", + "nmdc:wfmgas-13-56028x05.1_27272_c1", + "nmdc:wfmgas-13-56028x05.1_27358_c1", + "nmdc:wfmgas-13-56028x05.1_27411_c1", + "nmdc:wfmgas-13-56028x05.1_27550_c1", + "nmdc:wfmgas-13-56028x05.1_28892_c1", + "nmdc:wfmgas-13-56028x05.1_29003_c1", + "nmdc:wfmgas-13-56028x05.1_29238_c1", + "nmdc:wfmgas-13-56028x05.1_29324_c1", + "nmdc:wfmgas-13-56028x05.1_29771_c1", + "nmdc:wfmgas-13-56028x05.1_29878_c1", + "nmdc:wfmgas-13-56028x05.1_30248_c1", + "nmdc:wfmgas-13-56028x05.1_30476_c1", + "nmdc:wfmgas-13-56028x05.1_30587_c1", + "nmdc:wfmgas-13-56028x05.1_31160_c1", + "nmdc:wfmgas-13-56028x05.1_31834_c1", + "nmdc:wfmgas-13-56028x05.1_31922_c1", + "nmdc:wfmgas-13-56028x05.1_31971_c1", + "nmdc:wfmgas-13-56028x05.1_32244_c1", + "nmdc:wfmgas-13-56028x05.1_32605_c1", + "nmdc:wfmgas-13-56028x05.1_32623_c1", + "nmdc:wfmgas-13-56028x05.1_32832_c1", + "nmdc:wfmgas-13-56028x05.1_33068_c1", + "nmdc:wfmgas-13-56028x05.1_33334_c1", + "nmdc:wfmgas-13-56028x05.1_33438_c1", + "nmdc:wfmgas-13-56028x05.1_33855_c1", + "nmdc:wfmgas-13-56028x05.1_34035_c1", + "nmdc:wfmgas-13-56028x05.1_34120_c1", + "nmdc:wfmgas-13-56028x05.1_34140_c1", + "nmdc:wfmgas-13-56028x05.1_34133_c1", + "nmdc:wfmgas-13-56028x05.1_34177_c1", + "nmdc:wfmgas-13-56028x05.1_34481_c1", + "nmdc:wfmgas-13-56028x05.1_34728_c1", + "nmdc:wfmgas-13-56028x05.1_34843_c1", + "nmdc:wfmgas-13-56028x05.1_35665_c1", + "nmdc:wfmgas-13-56028x05.1_35772_c1", + "nmdc:wfmgas-13-56028x05.1_35995_c1" + ] + } + ] +} \ No newline at end of file diff --git a/tests/test_sched.py b/tests/test_sched.py index 6d25a253..6099999e 100644 --- a/tests/test_sched.py +++ b/tests/test_sched.py @@ -9,7 +9,7 @@ "workflows.yaml", "workflows-mt.yaml" ]) -def test_scheduler_cycle(test_db, mock_api, workflow_file, workflows_config_dir, site_config): +def test_scheduler_cycle(test_db, mock_api, workflow_file, workflows_config_dir, site_config_file): """ Test basic job creation. """ @@ -27,7 +27,7 @@ def test_scheduler_cycle(test_db, mock_api, workflow_file, workflows_config_dir, exp_num_jobs_initial = 1 exp_num_jobs_cycle_1 = 0 jm = Scheduler(test_db, wfn=workflows_config_dir / workflow_file, - site_conf=site_config) + site_conf=site_config_file) resp = jm.cycle() assert len(resp) == exp_num_jobs_initial assert resp[0]["config"]["git_repo"] in exp_rqc_git_repos @@ -40,7 +40,7 @@ def test_scheduler_cycle(test_db, mock_api, workflow_file, workflows_config_dir, "workflows.yaml", "workflows-mt.yaml" ]) -def test_progress(test_db, mock_api, workflow_file, workflows_config_dir, site_config): +def test_progress(test_db, mock_api, workflow_file, workflows_config_dir, site_config_file): reset_db(test_db) metatranscriptome = False if workflow_file == "workflows-mt.yaml": @@ -51,7 +51,7 @@ def test_progress(test_db, mock_api, workflow_file, workflows_config_dir, site_c jm = Scheduler(test_db, wfn=workflows_config_dir / workflow_file, - site_conf= site_config) + site_conf= site_config_file) workflow_by_name = dict() for wf in jm.workflows: workflow_by_name[wf.name] = wf @@ -119,7 +119,7 @@ def test_progress(test_db, mock_api, workflow_file, workflows_config_dir, site_c assert len(resp) == exp_num_post_annotation_jobs -def test_multiple_versions(test_db, mock_api, workflows_config_dir, site_config): +def test_multiple_versions(test_db, mock_api, workflows_config_dir, site_config_file): init_test(test_db) reset_db(test_db) test_db.jobs.delete_many({}) @@ -128,7 +128,7 @@ def test_multiple_versions(test_db, mock_api, workflows_config_dir, site_config) load_fixture(test_db, "data_generation_set.json") jm = Scheduler(test_db, wfn=workflows_config_dir / "workflows.yaml", - site_conf=site_config) + site_conf=site_config_file) workflow_by_name = dict() for wf in jm.workflows: workflow_by_name[wf.name] = wf @@ -154,14 +154,14 @@ def test_multiple_versions(test_db, mock_api, workflows_config_dir, site_config) assert len(resp) == 0 -def test_out_of_range(test_db, mock_api, workflows_config_dir, site_config): +def test_out_of_range(test_db, mock_api, workflows_config_dir, site_config_file): init_test(test_db) reset_db(test_db) test_db.jobs.delete_many({}) load_fixture(test_db, "data_object_set.json") load_fixture(test_db, "data_generation_set.json") jm = Scheduler(test_db, wfn=workflows_config_dir / "workflows.yaml", - site_conf=site_config) + site_conf=site_config_file) # Let's create two RQC records. One will be in range # and the other will not. We should only get new jobs # for the one in range. @@ -174,7 +174,7 @@ def test_out_of_range(test_db, mock_api, workflows_config_dir, site_config): resp = jm.cycle() assert len(resp) == 0 -def test_type_resolving(test_db, mock_api, workflows_config_dir, site_config): +def test_type_resolving(test_db, mock_api, workflows_config_dir, site_config_file): """ This tests the handling when the same type is used for different activity types. The desired behavior is to @@ -186,7 +186,7 @@ def test_type_resolving(test_db, mock_api, workflows_config_dir, site_config): load_fixture(test_db, "read_qc_analysis.json", col="workflow_execution_set") jm = Scheduler(test_db, wfn=workflows_config_dir / "workflows.yaml", - site_conf=site_config) + site_conf=site_config_file) workflow_by_name = dict() for wf in jm.workflows: workflow_by_name[wf.name] = wf @@ -205,7 +205,7 @@ def test_type_resolving(test_db, mock_api, workflows_config_dir, site_config): "workflows.yaml", "workflows-mt.yaml" ]) -def test_scheduler_add_job_rec(test_db, mock_api, workflow_file, workflows_config_dir, site_config): +def test_scheduler_add_job_rec(test_db, mock_api, workflow_file, workflows_config_dir, site_config_file): """ Test basic job creation. """ @@ -214,7 +214,7 @@ def test_scheduler_add_job_rec(test_db, mock_api, workflow_file, workflows_confi load_fixture(test_db, "data_generation_set.json") jm = Scheduler(test_db, wfn=workflows_config_dir / workflow_file, - site_conf=site_config) + site_conf=site_config_file) # sanity check assert jm diff --git a/tests/test_watch_nmdc.py b/tests/test_watch_nmdc.py index 9d0bca1d..2c790608 100644 --- a/tests/test_watch_nmdc.py +++ b/tests/test_watch_nmdc.py @@ -1,66 +1,309 @@ -from nmdc_automation.workflow_automation.watch_nmdc import Watcher -import os +import copy import json -import shutil +from pathlib import PosixPath, Path from pytest import fixture +import shutil +from unittest.mock import patch, PropertyMock, Mock +from nmdc_schema.nmdc import Database +from nmdc_automation.workflow_automation.watch_nmdc import ( + Watcher, + FileHandler, + JobManager +) +from nmdc_automation.workflow_automation.wfutils import WorkflowJob +from tests.fixtures import db_utils -@fixture(autouse=True) -def mock_cromwell(requests_mock, test_data_dir): - requests_mock.real_http = True - data = {"id": "1234"} - cromwell_url = "http://localhost:8088/api/workflows/v1" - requests_mock.post(cromwell_url, json=data) - afile_path = test_data_dir / "afile" - bfile_path = test_data_dir / "bfile" - metadata = {'outputs': { - "nmdc_rqcfilter.filtered_final": str(afile_path), - "nmdc_rqcfilter.filtered_stats_final": str(bfile_path), - "nmdc_rqcfilter.stats": { - "input_read_count": 11431762, - "input_read_bases": 1726196062, - "output_read_bases": 1244017053, - "output_read_count": 8312566 - }, - }} - requests_mock.get(f"{cromwell_url}/1234/metadata", json=metadata) - data = {"status": "Succeeded"} - requests_mock.get(f"{cromwell_url}/1234/status", json=data) - - -def test_watcher(site_config): - w = Watcher(site_config) - w.restore_from_checkpoint() - w.job_manager.job_checkpoint() - w.restore_from_checkpoint() - - -def test_claim_jobs(requests_mock, site_config, mock_api): - requests_mock.real_http = True - w = Watcher(site_config) - job_id = "nmdc:b7eb8cda-a6aa-11ed-b1cf-acde48001122" - resp = { - 'id': 'nmdc:1234', - 'detail': {'id': 'nmdc:1234'} - } - requests_mock.post(f"http://localhost/jobs/{job_id}:claim", json=resp) - w.claim_jobs() - w.cycle() - resp = w.job_manager.find_job_by_opid("nmdc:1234") - assert resp - - -def test_reclaim_job(requests_mock, site_config, mock_api): + +# FileHandler init tests +def test_file_handler_init_from_state_file(site_config, initial_state_file, tmp_path): + copy_state_file = tmp_path / "copy_state.json" + shutil.copy(initial_state_file, copy_state_file) + fh = FileHandler(site_config, initial_state_file) + assert fh + assert fh.state_file + assert isinstance(fh.state_file, PosixPath) + assert fh.state_file.exists() + assert fh.state_file.is_file() + # delete state file + fh.state_file = None + assert not fh.state_file + + # test setter + fh.state_file = initial_state_file + assert fh.state_file + assert fh.state_file.exists() + assert fh.state_file.is_file() + + # unlink state file + fh.state_file.unlink() + assert not fh.state_file.exists() + fh.state_file = copy_state_file + assert fh.state_file.exists() + assert fh.state_file.is_file() + + +def test_file_handler_init_from_config_agent_state(site_config, initial_state_file, tmp_path): + with patch("nmdc_automation.config.siteconfig.SiteConfig.agent_state", new_callable=PropertyMock) as mock_agent_state: + mock_agent_state.return_value = initial_state_file + fh = FileHandler(site_config) + assert fh + assert fh.state_file + assert fh.state_file.exists() + + +def test_file_handler_init_default_state(site_config): + # sanity check + assert site_config.agent_state is None + fh = FileHandler(site_config) + assert fh + assert fh.state_file + assert fh.state_file.exists() + # delete everything in the state file leaving an empty file + with open(fh.state_file, "w") as f: + f.write("") + assert fh.state_file.stat().st_size == 0 + + # create new FileHandler - should create new state file + fh2 = FileHandler(site_config) + assert fh2 + assert fh2.state_file + assert fh2.state_file.exists() + + +def test_file_handler_read_state(site_config, initial_state_file): + fh = FileHandler(site_config, initial_state_file) + state = fh.read_state() + assert state + assert isinstance(state, dict) + assert state.get("jobs") + assert isinstance(state.get("jobs"), list) + assert len(state.get("jobs")) == 1 + + +def test_file_handler_write_state(site_config, initial_state_file, fixtures_dir): + fh = FileHandler(site_config, initial_state_file) + state = fh.read_state() + assert state + # add new job + new_job = json.load(open(fixtures_dir / "new_state_job.json")) + assert new_job + state["jobs"].append(new_job) + fh.write_state(state) + # read state + new_state = fh.read_state() + assert new_state + assert isinstance(new_state, dict) + assert new_state.get("jobs") + assert isinstance(new_state.get("jobs"), list) + assert len(new_state.get("jobs")) == 2 + # reset state + fh.write_state(state) + + +def test_file_handler_get_output_path(site_config, initial_state_file, fixtures_dir): + # Arrange + was_informed_by = "nmdc:1234" + workflow_execution_id = "nmdc:56789" + mock_job = Mock() + mock_job.was_informed_by = was_informed_by + mock_job.workflow_execution_id = workflow_execution_id + + expected_output_path = site_config.data_dir / Path(was_informed_by) / Path(workflow_execution_id) + + fh = FileHandler(site_config, initial_state_file) + + # Act + output_path = fh.get_output_path(mock_job) + + # Assert + assert output_path + assert isinstance(output_path, PosixPath) + assert output_path == expected_output_path + + +def test_file_handler_write_metadata_if_not_exists(site_config, initial_state_file, fixtures_dir, tmp_path): + # Arrange + was_informed_by = "nmdc:1234" + workflow_execution_id = "nmdc:56789" + job_metadata = {"id": "xyz-123-456", "status": "Succeeded"} + mock_job = Mock() + mock_job.was_informed_by = was_informed_by + mock_job.workflow_execution_id = workflow_execution_id + mock_job.job.metadata = job_metadata + + + # patch config.data_dir + with patch("nmdc_automation.config.siteconfig.SiteConfig.data_dir", new_callable=PropertyMock) as mock_data_dir: + mock_data_dir.return_value = tmp_path + fh = FileHandler(site_config, initial_state_file) + + # Act + metadata_path = fh.write_metadata_if_not_exists(mock_job) + + # Assert + assert metadata_path + assert metadata_path.exists() + assert metadata_path.is_file() + + +# JobManager tests +def test_job_manager_init(site_config, initial_state_file): + # Arrange + fh = FileHandler(site_config, initial_state_file) + jm = JobManager(site_config, fh) + assert jm + assert jm.file_handler + assert jm.file_handler.state_file + + +def test_job_manager_restore_from_state(site_config, initial_state_file): + # Arrange + fh = FileHandler(site_config, initial_state_file) + jm = JobManager(site_config, fh, init_cache=False) + # Act + jm.restore_from_state() + # Assert + assert jm.job_cache + assert isinstance(jm.job_cache, list) + assert len(jm.job_cache) == 1 + assert isinstance(jm.job_cache[0], WorkflowJob) + + +def test_job_manager_job_checkpoint(site_config, initial_state_file): + # Arrange + fh = FileHandler(site_config, initial_state_file) + jm = JobManager(site_config, fh) + # Act + data = jm.job_checkpoint() + # Assert + assert data + assert isinstance(data, dict) + assert data.get("jobs") + assert isinstance(data.get("jobs"), list) + assert len(data.get("jobs")) == 1 + + +def test_job_manager_save_checkpoint(site_config, initial_state_file): + # Arrange + fh = FileHandler(site_config, initial_state_file) + jm = JobManager(site_config, fh) + # Act + jm.save_checkpoint() + # Assert + assert fh.state_file.exists() + assert fh.state_file.is_file() + + # cleanup + fh.state_file.unlink() + +def test_job_manager_find_job_by_opid(site_config, initial_state_file): + # Arrange + fh = FileHandler(site_config, initial_state_file) + jm = JobManager(site_config, fh) + # Act + job = jm.find_job_by_opid("nmdc:test-opid") + # Assert + assert job + assert isinstance(job, WorkflowJob) + assert job.opid == "nmdc:test-opid" + assert not job.done + + +def test_job_manager_prepare_and_cache_new_job(site_config, initial_state_file, fixtures_dir): + # Arrange + fh = FileHandler(site_config, initial_state_file) + jm = JobManager(site_config, fh) + new_job_state = json.load(open(fixtures_dir / "new_state_job.json")) + assert new_job_state + new_job = WorkflowJob(site_config, new_job_state) + # Act + opid = "nmdc:test-opid-2" + job = jm.prepare_and_cache_new_job(new_job, opid) + # Assert + assert job + assert isinstance(job, WorkflowJob) + assert job.opid == opid + assert not job.done + # cleanup + jm.job_cache = [] + + +def test_job_manager_get_finished_jobs(site_config, initial_state_file, fixtures_dir): + # Arrange - initial state has 1 failure and is not done + fh = FileHandler(site_config, initial_state_file) + jm = JobManager(site_config, fh) + + # Add a job to the cache - mags is done and successful + new_job_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + assert new_job_state + new_job = WorkflowJob(site_config, new_job_state) + jm.job_cache.append(new_job) + # sanity check + assert len(jm.job_cache) == 2 + + # add a failed job + failed_job_state = json.load(open(fixtures_dir / "failed_job_state.json")) + assert failed_job_state + failed_job = WorkflowJob(site_config, failed_job_state) + assert failed_job.job_status == "Failed" + jm.job_cache.append(failed_job) + # sanity check + assert len(jm.job_cache) == 3 + + # Act + successful_jobs, failed_jobs = jm.get_finished_jobs() + # Assert + assert successful_jobs + assert failed_jobs + # cleanup + jm.job_cache = [] + + +def test_job_manager_process_successful_job(site_config, initial_state_file, fixtures_dir): + # Arrange + fh = FileHandler(site_config, initial_state_file) + jm = JobManager(site_config, fh) + new_job_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + assert new_job_state + new_job = WorkflowJob(site_config, new_job_state) + jm.job_cache.append(new_job) + # Act + db = jm.process_successful_job(new_job) + # Assert + assert db + assert isinstance(db, Database) + assert new_job.done + assert new_job.job_status == "Succeeded" + # cleanup + jm.job_cache = [] + + +@fixture +def mock_runtime_api_handler(site_config, mock_api): + pass + + +def test_claim_jobs(site_config_file, site_config, fixtures_dir): + # Arrange + with (patch("nmdc_automation.workflow_automation.watch_nmdc.RuntimeApiHandler.claim_job") as mock_claim_job): + mock_claim_job.return_value = {"id": "nmdc:1234", "detail": {"id": "nmdc:1234"}} + job_record = json.load(open(fixtures_dir / "mags_job_metadata.json")) + unclaimed_wfj = WorkflowJob(site_config, job_record) + w = Watcher(site_config_file) + w.claim_jobs(unclaimed_jobs=[unclaimed_wfj]) + + +def test_reclaim_job(requests_mock, site_config_file, mock_api): requests_mock.real_http = True - w = Watcher(site_config) + w = Watcher(site_config_file) job_id = "nmdc:b7eb8cda-a6aa-11ed-b1cf-acde48001122" - resp = { - 'id': 'nmdc:1234', - 'detail': {'id': 'nmdc:1234'} - } - requests_mock.post(f"http://localhost/jobs/{job_id}:claim", json=resp, - status_code=409) - w.claim_jobs() - resp = w.job_manager.find_job_by_opid("nmdc:1234") - assert resp + resp = {'id': 'nmdc:1234', 'detail': {'id': 'nmdc:1234'}} + requests_mock.post( + f"http://localhost/jobs/{job_id}:claim", json=resp, status_code=409 + ) # w.claim_jobs() # resp = w.job_manager.find_job_by_opid("nmdc:1234") # assert resp + + +def test_watcher_restore_from_checkpoint(site_config_file, fixtures_dir): + state_file = fixtures_dir / "mags_workflow_state.json" diff --git a/tests/test_wfutils.py b/tests/test_wfutils.py index eac03670..7e0ff4ea 100644 --- a/tests/test_wfutils.py +++ b/tests/test_wfutils.py @@ -1,47 +1,91 @@ -from nmdc_automation.workflow_automation.wfutils import WorkflowJob as job +from nmdc_automation.workflow_automation.wfutils import ( + CromwellRunner, + WorkflowJob, + WorkflowStateManager +) +from nmdc_automation.workflow_automation.models import DataObject, workflow_process_factory +from nmdc_schema.nmdc import MagsAnalysis, EukEval import json +def test_workflow_job(site_config, fixtures_dir): + workflow_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + job_metadata = json.load(open(fixtures_dir / "mags_job_metadata.json")) + + job = WorkflowJob(site_config, workflow_state, job_metadata) + assert job + assert job.workflow_execution_id == workflow_state['activity_id'] + + +def test_cromwell_job_runner(site_config, fixtures_dir): + # load cromwell metadata + job_metadata = json.load(open(fixtures_dir / "mags_job_metadata.json")) + job_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + + + job_runner = CromwellRunner(site_config, job_state, job_metadata) + assert job_runner + + +def test_workflow_job_as_workflow_execution_dict(site_config, fixtures_dir): + workflow_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + job_metadata = json.load(open(fixtures_dir / "mags_job_metadata.json")) + + wfj = WorkflowJob(site_config, workflow_state, job_metadata) + + wfe_dict = wfj.as_workflow_execution_dict + assert wfe_dict + + +def test_state_manager(fixtures_dir): + mags_job_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + + state = WorkflowStateManager(mags_job_state) + assert state.workflow_execution_id == mags_job_state['activity_id'] + assert state.config == mags_job_state['conf'] + assert state.execution_template == mags_job_state['conf']['activity'] + assert state.was_informed_by == mags_job_state['conf']['was_informed_by'] + + +def test_workflow_job_data_objects_and_execution_record_mags(site_config, fixtures_dir, tmp_path): + # Note: test working dir must be the root of the project for this to work + job_metadata = json.load(open(fixtures_dir / "mags_job_metadata.json")) + workflow_state = json.load(open(fixtures_dir / "mags_workflow_state.json")) + job = WorkflowJob(site_config, workflow_state, job_metadata) + data_objects = job.make_data_objects(output_dir=tmp_path) + assert data_objects + for data_object in data_objects: + assert isinstance(data_object, DataObject) + wfe_dict = job.make_workflow_execution_record(data_objects) + wfe = workflow_process_factory(wfe_dict) + assert isinstance(wfe, MagsAnalysis) + # attributes from final_stats_json + assert wfe.mags_list + assert isinstance(wfe.mags_list, list) + # check for eukaryotic evaluation in each mag + for mag in wfe.mags_list: + assert mag.eukaryotic_evaluation + assert isinstance(mag.eukaryotic_evaluation, EukEval) + assert mag.eukaryotic_evaluation.completeness + assert mag.eukaryotic_evaluation.contamination + assert mag.eukaryotic_evaluation.ncbi_lineage + assert mag.eukaryotic_evaluation.ncbi_lineage + # check that the other final_stats props are there + assert isinstance(wfe.input_contig_num, int) + assert isinstance(wfe.too_short_contig_num, int) + assert isinstance(wfe.unbinned_contig_num, int) + assert isinstance(wfe.binned_contig_num, int) + + +def test_workflow_job_from_database_job_record(site_config, fixtures_dir): + job_rec = json.load(open(fixtures_dir / "unsubmitted_job_record.json")) + assert job_rec + job = WorkflowJob(site_config, job_rec) + assert job + assert job.workflow.nmdc_jobid == job_rec['id'] + + + + + -def test_job(job_config, requests_mock, test_data_dir): - requests_mock.real_http = True - data = {"id": "123"} - requests_mock.post("http://localhost:8088/api/workflows/v1", json=data) - rqcf = test_data_dir / "rqc_response.json" - rqc = json.load(open(rqcf)) - ajob = job(job_config, workflow_config=rqc['config']) - ajob.debug = True - ajob.dryrun = False - assert ajob.get_state() - ajob.cromwell_submit() - last = requests_mock.request_history[-1] - assert last.method == "POST" - assert last.url == "http://localhost:8088/api/workflows/v1" - - -def test_log(job_config): - ajob = job(job_config, workflow_config={}) - # ajob = job("example", "jobid", conf={}) - ajob.debug = True - ajob.json_log({"a": "b"}, title="Test") - - -def test_check_meta(job_config, requests_mock): - url = "http://localhost:8088/api/workflows/v1/1234/status" - requests_mock.get(url, json={"status": "Submitted"}) - url = "http://localhost:8088/api/workflows/v1/1234/metadata" - requests_mock.get(url, json={"status": "Submitted"}) - ajob = job(job_config, workflow_config={}) - ajob.jobid = "1234" - resp = ajob.check_status() - assert resp - resp = ajob.get_metadata() - assert resp - - -def test_set_state(job_config): - ajob = job(job_config, workflow_config={}) - state = ajob.get_state() - assert state - bjob = job(job_config, state=state) - assert bjob.activity_id == state['activity_id']