From f7ebfa974f50c983649e2984244f5a21d9670359 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Mon, 16 Dec 2024 15:29:10 -0800 Subject: [PATCH 01/10] Fix #5 --- .../metadata_generator.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py b/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py index 67c654c..45af86d 100644 --- a/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py +++ b/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py @@ -308,6 +308,7 @@ def run(self): metab_analysis = self.generate_metabolomics_analysis( cluster_name=workflow_metadata_obj.execution_resource, raw_data_name=Path(workflow_metadata_obj.raw_data_file).name, + raw_data_id=raw_data_object.id, data_gen_id=mass_spec.id, processed_data_id="nmdc:placeholder", processing_institution=group_metadata_obj.processing_institution @@ -704,6 +705,7 @@ def generate_metabolomics_analysis( self, cluster_name: str, raw_data_name: str, + raw_data_id: str, data_gen_id: str, processed_data_id: str, processing_institution: str @@ -720,6 +722,8 @@ def generate_metabolomics_analysis( Name of the cluster or computing resource used for the analysis. raw_data_name : str Name of the raw data file that was analyzed. + raw_data_id : str + ID of the raw data object that was analyzed. data_gen_id : str ID of the DataGeneration object that generated the raw data. processed_data_id : str @@ -749,7 +753,7 @@ def generate_metabolomics_analysis( 'git_url': self.workflow_git_url, 'version': self.workflow_version, 'was_informed_by': data_gen_id, - 'has_input': [raw_data_name], + 'has_input': [raw_data_id], 'has_output': [processed_data_id], 'started_at_time': 'placeholder', 'ended_at_time': 'placeholder', From 13b9e6595f2f317fce50e2fea27eedf9084e9d1b Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Mon, 16 Dec 2024 15:33:35 -0800 Subject: [PATCH 02/10] Add versioning to MetabolomicsAnalysis id Leave a TODO note that this will have to be changed when we implement versioning for the workflow --- .../nmdc_lipidomics_metadata_generation/metadata_generator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py b/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py index 45af86d..c20b9c6 100644 --- a/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py +++ b/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py @@ -742,7 +742,8 @@ def generate_metabolomics_analysis( placeholder values and should be updated with actual timestamps later when the processed files are iterated over in the run method. """ - nmdc_id = self.mint_nmdc_id(nmdc_type=NmdcTypes.MetabolomicsAnalysis)[0] + nmdc_id = self.mint_nmdc_id(nmdc_type=NmdcTypes.MetabolomicsAnalysis)[0]+".1" + #TODO: Update the minting to handle versioning in the future data_dict = { 'id': nmdc_id, From 3103c67907ba08b37f2f1ca9dc1eb3552bf15419 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Mon, 16 Dec 2024 15:35:38 -0800 Subject: [PATCH 03/10] Modify MassSpectrometry description Fixes #7 --- .../nmdc_lipidomics_metadata_generation/metadata_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py b/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py index c20b9c6..5149f80 100644 --- a/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py +++ b/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py @@ -209,7 +209,7 @@ def __init__( 'processing institution' ] self.mass_spec_desc = ( - "Analysis of raw mass spectrometry data for the annotation of lipids." + "Generation of mass spectrometry data for the analysis of lipids." ) self.mass_spec_eluent_intro = "liquid_chromatography" self.analyte_category = "lipidome" From c9fd2eac5804e2ca34ed9edcb63e8e5df32754c1 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Mon, 16 Dec 2024 15:40:20 -0800 Subject: [PATCH 04/10] Remove generation of alternative ids from lipid metadata generation script --- .../metadata_generator.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py b/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py index 5149f80..50aafbd 100644 --- a/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py +++ b/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py @@ -57,8 +57,6 @@ class WorkflowMetadata: Directory containing processed data files. raw_data_file : str Path or name of the raw data file. - raw_data_object_alt_id : str - Alternative identifier for the raw data object. mass_spec_config_name : str Name of the mass spectrometry configuration used. lc_config_name : str @@ -74,7 +72,6 @@ class WorkflowMetadata: """ processed_data_dir: str raw_data_file: str - raw_data_object_alt_id: str mass_spec_config_name: str lc_config_name: str instrument_used: str @@ -302,7 +299,6 @@ def run(self): description=self.raw_data_obj_desc, base_url=self.raw_data_url, was_generated_by=mass_spec.id, - alternative_id=workflow_metadata_obj.raw_data_object_alt_id ) metab_analysis = self.generate_metabolomics_analysis( @@ -397,7 +393,7 @@ def load_metadata(self) -> pd.core.groupby.DataFrameGroupBy: FileNotFoundError If the `metadata_file` does not exist. ValueError - If values in columns 'Raw Data File', 'Raw Data Object Alt Id', + If values in columns 'Raw Data File', and 'Processed Data Directory' are not unique. Notes @@ -413,7 +409,6 @@ def load_metadata(self) -> pd.core.groupby.DataFrameGroupBy: # Check for uniqueness in specified columns columns_to_check = [ 'Raw Data File', - 'Raw Data Object Alt Id', 'Processed Data Directory' ] for column in columns_to_check: @@ -482,7 +477,6 @@ def create_workflow_metadata( return WorkflowMetadata( processed_data_dir=row['Processed Data Directory'], raw_data_file=row['Raw Data File'], - raw_data_object_alt_id=row['Raw Data Object Alt Id'], mass_spec_config_name=row['mass spec configuration name'], lc_config_name=row['lc config name'], instrument_used=row['instrument used'], From abce375d12f2df58aa060845b893d78245b4fdd6 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Mon, 16 Dec 2024 16:10:33 -0800 Subject: [PATCH 05/10] Add check to make sure biosamples exist for lipid metadata generation --- .../api_info_retriever.py | 39 +++++++++++++++++++ .../metadata_generator.py | 9 ++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py b/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py index 6bea42a..eb02920 100644 --- a/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py +++ b/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py @@ -73,3 +73,42 @@ def get_id_by_name_from_collection(self, name_field_value: str) -> str: raise requests.RequestException(f"Error making API request: {e}") except (KeyError, IndexError) as e: raise IndexError(f"No matching entry found for '{name_field_value}': {e}") + + def check_if_ids_exist(self, ids: list) -> bool: + """ + Check if the IDs exist in the collection. + + This method constructs a query to the API to filter the collection based on the given IDs, and checks if all IDs exist in the collection. + + Parameters + ---------- + ids : list + A list of IDs to check if they exist in the collection. + + Returns + ------- + bool + True if all IDs exist in the collection, False otherwise. + + Raises + ------ + requests.RequestException + If there's an error in making the API request. + """ + ids_test = list(set(ids)) + ids_test = [id.replace('"', "'") for id in ids_test] + ids_test_str = ", ".join(f'"{id}"' for id in ids_test) + match_id_field = "id" # Replace with the actual field name if different + filter_param = f'{{"{match_id_field}": {{"$in": [{ids_test_str}]}}}}' + og_url = f'https://api.microbiomedata.org/nmdcschema/{self.collection_name}?&filter={filter_param}&projection={match_id_field}' + + try: + resp = requests.get(og_url) + resp.raise_for_status() # Raises an HTTPError for bad responses + data = resp.json() + if not len(data['resources']) != len(ids_test): + return False + except requests.RequestException as e: + raise requests.RequestException(f"Error making API request: {e}") + + return True \ No newline at end of file diff --git a/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py b/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py index 50aafbd..dc54184 100644 --- a/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py +++ b/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py @@ -381,7 +381,7 @@ def load_metadata(self) -> pd.core.groupby.DataFrameGroupBy: Load and group workflow metadata from a CSV file. This method reads the metadata CSV file, checks for uniqueness in - specified columns, and groups the data by biosample ID. + specified columns, checks that biosamples exist, and groups the data by biosample ID. Returns ------- @@ -414,6 +414,13 @@ def load_metadata(self) -> pd.core.groupby.DataFrameGroupBy: for column in columns_to_check: if not metadata_df[column].is_unique: raise ValueError(f"Duplicate values found in column '{column}'.") + + # Check that all biosamples exist + biosample_ids = metadata_df['Biosample Id'].unique() + api_biosample_getter = ApiInfoRetriever(collection_name="biosample_set") + + if not api_biosample_getter.check_if_ids_exist(biosample_ids): + raise ValueError("Biosample IDs do not exist in the collection.") # Group by Biosample grouped = metadata_df.groupby('Biosample Id') From 8ddb340a68e0efeeb363b62503b6d059bfae11ff Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Mon, 16 Dec 2024 16:29:03 -0800 Subject: [PATCH 06/10] Add validation step to lipid metadata generation --- .../api_info_retriever.py | 18 +++++------ .../metadata_generator.py | 31 ++++++++++++++++++- 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py b/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py index eb02920..c92b70a 100644 --- a/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py +++ b/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py @@ -1,6 +1,7 @@ import requests from dataclasses import dataclass + class ApiInfoRetriever: """ A class to retrieve API information from a specified collection. @@ -61,13 +62,13 @@ def get_id_by_name_from_collection(self, name_field_value: str) -> str: filter_param = f'{{"name": "{name_field_value}"}}' field = "id" - og_url = f'https://api.microbiomedata.org/nmdcschema/{self.collection_name}?&filter={filter_param}&projection={field}' - + og_url = f"https://api.microbiomedata.org/nmdcschema/{self.collection_name}?&filter={filter_param}&projection={field}" + try: resp = requests.get(og_url) resp.raise_for_status() # Raises an HTTPError for bad responses data = resp.json() - identifier = data['resources'][0]['id'] + identifier = data["resources"][0]["id"] return identifier except requests.RequestException as e: raise requests.RequestException(f"Error making API request: {e}") @@ -98,17 +99,16 @@ def check_if_ids_exist(self, ids: list) -> bool: ids_test = list(set(ids)) ids_test = [id.replace('"', "'") for id in ids_test] ids_test_str = ", ".join(f'"{id}"' for id in ids_test) - match_id_field = "id" # Replace with the actual field name if different - filter_param = f'{{"{match_id_field}": {{"$in": [{ids_test_str}]}}}}' - og_url = f'https://api.microbiomedata.org/nmdcschema/{self.collection_name}?&filter={filter_param}&projection={match_id_field}' - + filter_param = f'{{"id": {{"$in": [{ids_test_str}]}}}}' + og_url = f"https://api.microbiomedata.org/nmdcschema/{self.collection_name}?&filter={filter_param}&projection=id" + try: resp = requests.get(og_url) resp.raise_for_status() # Raises an HTTPError for bad responses data = resp.json() - if not len(data['resources']) != len(ids_test): + if not len(data["resources"]) != len(ids_test): return False except requests.RequestException as e: raise requests.RequestException(f"Error making API request: {e}") - return True \ No newline at end of file + return True diff --git a/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py b/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py index dc54184..52d6de6 100644 --- a/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py +++ b/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py @@ -374,6 +374,7 @@ def run(self): nmdc_database_inst.workflow_execution_set.append(metab_analysis) self.dump_nmdc_database(nmdc_database=nmdc_database_inst) + self.validate_json() logging.info("Metadata processing completed.") def load_metadata(self) -> pd.core.groupby.DataFrameGroupBy: @@ -848,4 +849,32 @@ def dump_nmdc_database(self, nmdc_database: nmdc.Database) -> None: logging.info( "Database successfully dumped in %s", self.database_dump_json_path - ) \ No newline at end of file + ) + + def validate_json(self) -> None: + """ + Validates the json file generated by the MetadataGenerator class. + + This method reads the JSON file generated by the MetadataGenerator class + and validates it against the NMDC schema. + + If the validation passes, the method returns without any side effects. + + Raises + ------ + Exception + If the validation fails. + """ + with open(self.database_dump_json_path, 'r') as f: + data = json.load(f) + + url = 'https://api-dev.microbiomedata.org/metadata/json:validate' + headers = { + 'accept': 'application/json', + 'Content-Type': 'application/json' + } + response = requests.post(url, headers=headers, json=data) + if response.status_code != 200: + logging.error(f"Request failed with status code {response.status_code}") + logging.error(response.text) + raise Exception("Validation failed") \ No newline at end of file From fd5d935cc5169f1a4531a8d6de58b0144b0eaa28 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Wed, 18 Dec 2024 14:53:24 -0800 Subject: [PATCH 07/10] Restructure API class --- .../api_info_retriever.py | 43 +++++++++++++++++-- .../metadata_generator.py | 30 +------------ 2 files changed, 40 insertions(+), 33 deletions(-) diff --git a/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py b/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py index c92b70a..53fdb7b 100644 --- a/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py +++ b/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py @@ -1,8 +1,42 @@ import requests -from dataclasses import dataclass +import json +import logging +class NMDCAPIInterface: + """ + A genereric interface for the NMDC runtime API. + """ + + def __init__(self): + self.base_url = "https://api.microbiomedata.org" + + def validate_json(self) -> None: + """ + Validates a json file using the NMDC json validate endpoint. -class ApiInfoRetriever: + If the validation passes, the method returns without any side effects. + + Raises + ------ + Exception + If the validation fails. + """ + with open(self.database_dump_json_path, 'r') as f: + data = json.load(f) + + url = f"{self.base_url}/metadata/json:validate" + headers = { + 'accept': 'application/json', + 'Content-Type': 'application/json' + } + response = requests.post(url, headers=headers, json=data) + if response.status_code != 200: + logging.error(f"Request failed with status code {response.status_code}") + logging.error(response.text) + raise Exception("Validation failed") + + +class ApiInfoRetriever(NMDCAPIInterface): """ A class to retrieve API information from a specified collection. @@ -29,6 +63,7 @@ def __init__(self, collection_name: str): collection_name : str The name of the collection to be used for API queries. """ + super().__init__() self.collection_name = collection_name def get_id_by_name_from_collection(self, name_field_value: str) -> str: @@ -62,7 +97,7 @@ def get_id_by_name_from_collection(self, name_field_value: str) -> str: filter_param = f'{{"name": "{name_field_value}"}}' field = "id" - og_url = f"https://api.microbiomedata.org/nmdcschema/{self.collection_name}?&filter={filter_param}&projection={field}" + og_url = f"{self.base_url}/nmdcschema/{self.collection_name}?&filter={filter_param}&projection={field}" try: resp = requests.get(og_url) @@ -100,7 +135,7 @@ def check_if_ids_exist(self, ids: list) -> bool: ids_test = [id.replace('"', "'") for id in ids_test] ids_test_str = ", ".join(f'"{id}"' for id in ids_test) filter_param = f'{{"id": {{"$in": [{ids_test_str}]}}}}' - og_url = f"https://api.microbiomedata.org/nmdcschema/{self.collection_name}?&filter={filter_param}&projection=id" + og_url = f"{self.base_url}/nmdcschema/{self.collection_name}?&filter={filter_param}&projection=id" try: resp = requests.get(og_url) diff --git a/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py b/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py index 52d6de6..6ea9927 100644 --- a/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py +++ b/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py @@ -849,32 +849,4 @@ def dump_nmdc_database(self, nmdc_database: nmdc.Database) -> None: logging.info( "Database successfully dumped in %s", self.database_dump_json_path - ) - - def validate_json(self) -> None: - """ - Validates the json file generated by the MetadataGenerator class. - - This method reads the JSON file generated by the MetadataGenerator class - and validates it against the NMDC schema. - - If the validation passes, the method returns without any side effects. - - Raises - ------ - Exception - If the validation fails. - """ - with open(self.database_dump_json_path, 'r') as f: - data = json.load(f) - - url = 'https://api-dev.microbiomedata.org/metadata/json:validate' - headers = { - 'accept': 'application/json', - 'Content-Type': 'application/json' - } - response = requests.post(url, headers=headers, json=data) - if response.status_code != 200: - logging.error(f"Request failed with status code {response.status_code}") - logging.error(response.text) - raise Exception("Validation failed") \ No newline at end of file + ) \ No newline at end of file From 01d9ef50894e7511f8931a71497c86d96fdd6559 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Wed, 18 Dec 2024 16:47:06 -0800 Subject: [PATCH 08/10] Move parameter file from output to input for workflow --- .../api_info_retriever.py | 17 ++++++++++++++--- .../metadata_generator.py | 18 ++++++++++++------ 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py b/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py index 53fdb7b..b8345ed 100644 --- a/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py +++ b/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py @@ -10,18 +10,23 @@ class NMDCAPIInterface: def __init__(self): self.base_url = "https://api.microbiomedata.org" - def validate_json(self) -> None: + def validate_json(self, json_path) -> None: """ Validates a json file using the NMDC json validate endpoint. If the validation passes, the method returns without any side effects. + Parameters + ---------- + json_path : str + The path to the json file to be validated. + Raises ------ Exception If the validation fails. """ - with open(self.database_dump_json_path, 'r') as f: + with open(json_path, 'r') as f: data = json.load(f) url = f"{self.base_url}/metadata/json:validate" @@ -141,9 +146,15 @@ def check_if_ids_exist(self, ids: list) -> bool: resp = requests.get(og_url) resp.raise_for_status() # Raises an HTTPError for bad responses data = resp.json() - if not len(data["resources"]) != len(ids_test): + if len(data["resources"]) != len(ids_test): return False except requests.RequestException as e: raise requests.RequestException(f"Error making API request: {e}") return True + +biosample_ids = ["nmdc:bsm-11-q0bxzb10"] +api_biosample_getter = ApiInfoRetriever(collection_name="biosample_set") + +if not api_biosample_getter.check_if_ids_exist(biosample_ids): + raise ValueError("Biosample IDs do not exist in the collection.") diff --git a/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py b/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py index 6ea9927..cd61b26 100644 --- a/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py +++ b/metaMS/nmdc_lipidomics_metadata_generation/metadata_generator.py @@ -15,7 +15,7 @@ import nmdc_schema.nmdc as nmdc from linkml_runtime.dumpers import json_dumper -from api_info_retriever import ApiInfoRetriever +from api_info_retriever import ApiInfoRetriever, NMDCAPIInterface # Configure logging logging.basicConfig( @@ -307,6 +307,7 @@ def run(self): raw_data_id=raw_data_object.id, data_gen_id=mass_spec.id, processed_data_id="nmdc:placeholder", + parameter_data_id="nmdc:placeholder", processing_institution=group_metadata_obj.processing_institution ) @@ -329,7 +330,7 @@ def run(self): was_generated_by=metab_analysis.id ) nmdc_database_inst.data_object_set.append(processed_data_object) - processed_data.append(processed_data_object.id) + parameter_data_id = processed_data_object.id elif file_type == 'csv': processed_data_object = self.generate_data_object( @@ -366,6 +367,7 @@ def run(self): mass_spec_obj=mass_spec, analysis_obj=metab_analysis, raw_data_obj=raw_data_object, + parameter_data_id=parameter_data_id, processed_data_id_list=processed_data ) @@ -374,7 +376,8 @@ def run(self): nmdc_database_inst.workflow_execution_set.append(metab_analysis) self.dump_nmdc_database(nmdc_database=nmdc_database_inst) - self.validate_json() + api_interface = NMDCAPIInterface() + api_interface.validate_json(self.database_dump_json_path) logging.info("Metadata processing completed.") def load_metadata(self) -> pd.core.groupby.DataFrameGroupBy: @@ -593,8 +596,6 @@ def generate_mass_spectrometry( ----- This method uses the ApiInfoRetriever to fetch IDs for the instrument and configurations. It also mints a new NMDC ID for the DataGeneration object. - - TODO: Update docstring with new variables (e.g. analyte_category). """ nmdc_id = self.mint_nmdc_id(nmdc_type=NmdcTypes.MassSpectrometry)[0] @@ -710,6 +711,7 @@ def generate_metabolomics_analysis( raw_data_id: str, data_gen_id: str, processed_data_id: str, + parameter_data_id: str, processing_institution: str ) -> nmdc.MetabolomicsAnalysis: """ @@ -730,6 +732,8 @@ def generate_metabolomics_analysis( ID of the DataGeneration object that generated the raw data. processed_data_id : str ID of the processed data resulting from the analysis. + parameter_data_id : str + ID of the parameter data object used for the analysis. processing_institution : str Name of the institution where the analysis was performed. @@ -756,7 +760,7 @@ def generate_metabolomics_analysis( 'git_url': self.workflow_git_url, 'version': self.workflow_version, 'was_informed_by': data_gen_id, - 'has_input': [raw_data_id], + 'has_input': [raw_data_id, parameter_data_id], 'has_output': [processed_data_id], 'started_at_time': 'placeholder', 'ended_at_time': 'placeholder', @@ -772,6 +776,7 @@ def update_outputs( mass_spec_obj: object, analysis_obj: object, raw_data_obj: object, + parameter_data_id: str, processed_data_id_list: list ) -> None: """ @@ -805,6 +810,7 @@ def update_outputs( - Sets `analysis_obj.has_output` to `processed_data_id_list`. """ mass_spec_obj.has_output = [raw_data_obj.id] + analysis_obj.has_input[1] = parameter_data_id analysis_obj.has_output = processed_data_id_list def start_nmdc_database(self) -> nmdc.Database: From 38db3860182efe7a8e4cab885c226943109d0205 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Wed, 18 Dec 2024 16:50:43 -0800 Subject: [PATCH 09/10] Add docstring to NMDCAPIInterface class --- .../api_info_retriever.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py b/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py index b8345ed..3314af7 100644 --- a/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py +++ b/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py @@ -4,7 +4,7 @@ class NMDCAPIInterface: """ - A genereric interface for the NMDC runtime API. + A generic interface for the NMDC runtime API. """ def __init__(self): @@ -152,9 +152,3 @@ def check_if_ids_exist(self, ids: list) -> bool: raise requests.RequestException(f"Error making API request: {e}") return True - -biosample_ids = ["nmdc:bsm-11-q0bxzb10"] -api_biosample_getter = ApiInfoRetriever(collection_name="biosample_set") - -if not api_biosample_getter.check_if_ids_exist(biosample_ids): - raise ValueError("Biosample IDs do not exist in the collection.") From 95bc1561a565c992470a4d7ea4796afb886258f0 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Wed, 18 Dec 2024 16:53:04 -0800 Subject: [PATCH 10/10] Remove Alt ID column from example csv --- .../api_info_retriever.py | 10 ++++++++++ .../example_metadata_file.csv | 10 +++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py b/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py index 3314af7..3b4f862 100644 --- a/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py +++ b/metaMS/nmdc_lipidomics_metadata_generation/api_info_retriever.py @@ -5,6 +5,16 @@ class NMDCAPIInterface: """ A generic interface for the NMDC runtime API. + + Attributes + ---------- + base_url : str + The base URL for the NMDC runtime API. + + Methods + ------- + validate_json(json_path: str) -> None: + Validates a json file using the NMDC json validate endpoint. """ def __init__(self): diff --git a/metaMS/nmdc_lipidomics_metadata_generation/example_metadata_file.csv b/metaMS/nmdc_lipidomics_metadata_generation/example_metadata_file.csv index e2524b8..07e2268 100644 --- a/metaMS/nmdc_lipidomics_metadata_generation/example_metadata_file.csv +++ b/metaMS/nmdc_lipidomics_metadata_generation/example_metadata_file.csv @@ -1,5 +1,5 @@ -Biosample Id,Associated Study,Processing Type,Raw Data File,Raw Data Object Alt Id,Processed Data Directory,mass spec configuration name,lc config name,instrument used,processing institution,instrument analysis start date,instrument analysis end date,execution resource -nmdc:biosamp:12-233,nmdc:sty-11-8fb6t785,MPLEX,/Users/zalm693/development/nmdc_ingest/example_lip_data/raw/raw_data_test1.raw,https://status.my.emsl.pnl.gov/view/1834807,/Users/zalm693/development/nmdc_ingest/example_lip_data/processed/sample1_ms1_processed,"EMSL lipidomics DDA mass spectrometry method, positive",EMSL metabolomics GC/MS mass spectrometry method,15T FT-ICR MS ,EMSL,01/22/2023,03/04/2024,EMSL-RZR -nmdc:biosamp:12-233,nmdc:sty-11-8fb6t785,MPLEX,/Users/zalm693/development/nmdc_ingest/example_lip_data/raw/raw_data_test2.raw,https://status.my.emsl.pnl.gov/view/1834808,/Users/zalm693/development/nmdc_ingest/example_lip_data/processed/sample1_ms2_processed,"EMSL lipidomics DDA mass spectrometry method, negative",EMSL metabolomics GC/MS mass spectrometry method,12T FT-ICR MS ,EMSL,01/22/2023,03/04/2024,EMSL-RZR -nmdc:biosamp:13-234,nmdc:sty-11-8fb6t785,MPLEX,/Users/zalm693/development/nmdc_ingest/example_lip_data/raw/raw_data_test3.raw,https://status.my.emsl.pnl.gov/view/1554801,/Users/zalm693/development/nmdc_ingest/example_lip_data/processed/sample2_ms1_processed,"EMSL lipidomics DDA mass spectrometry method, positive",EMSL metabolomics GC/MS mass spectrometry method,VOrbiETD04,EMSL,01/22/2023,03/04/2024,EMSL-RZR -nmdc:biosamp:13-234,nmdc:sty-11-8fb6t785,MPLEX,/Users/zalm693/development/nmdc_ingest/example_lip_data/raw/raw_data_test4.raw,https://status.my.emsl.pnl.gov/view/1554809,/Users/zalm693/development/nmdc_ingest/example_lip_data/processed/sample2_ms2_processed,"EMSL lipidomics DDA mass spectrometry method, negative",EMSL metabolomics GC/MS mass spectrometry method,Agilent GC-MS (2009),EMSL,01/22/2023,03/04/2024,EMSL-RZR \ No newline at end of file +Biosample Id,Associated Study,Processing Type,Raw Data File,Processed Data Directory,mass spec configuration name,lc config name,instrument used,processing institution,instrument analysis start date,instrument analysis end date,execution resource +nmdc:biosamp:12-233,nmdc:sty-11-8fb6t785,MPLEX,/Users/zalm693/development/nmdc_ingest/example_lip_data/raw/raw_data_test1.raw,/Users/zalm693/development/nmdc_ingest/example_lip_data/processed/sample1_ms1_processed,"EMSL lipidomics DDA mass spectrometry method, positive",EMSL metabolomics GC/MS mass spectrometry method,15T FT-ICR MS ,EMSL,1/22/23,3/4/24,EMSL-RZR +nmdc:biosamp:12-233,nmdc:sty-11-8fb6t785,MPLEX,/Users/zalm693/development/nmdc_ingest/example_lip_data/raw/raw_data_test2.raw,/Users/zalm693/development/nmdc_ingest/example_lip_data/processed/sample1_ms2_processed,"EMSL lipidomics DDA mass spectrometry method, negative",EMSL metabolomics GC/MS mass spectrometry method,12T FT-ICR MS ,EMSL,1/22/23,3/4/24,EMSL-RZR +nmdc:biosamp:13-234,nmdc:sty-11-8fb6t785,MPLEX,/Users/zalm693/development/nmdc_ingest/example_lip_data/raw/raw_data_test3.raw,/Users/zalm693/development/nmdc_ingest/example_lip_data/processed/sample2_ms1_processed,"EMSL lipidomics DDA mass spectrometry method, positive",EMSL metabolomics GC/MS mass spectrometry method,VOrbiETD04,EMSL,1/22/23,3/4/24,EMSL-RZR +nmdc:biosamp:13-234,nmdc:sty-11-8fb6t785,MPLEX,/Users/zalm693/development/nmdc_ingest/example_lip_data/raw/raw_data_test4.raw,/Users/zalm693/development/nmdc_ingest/example_lip_data/processed/sample2_ms2_processed,"EMSL lipidomics DDA mass spectrometry method, negative",EMSL metabolomics GC/MS mass spectrometry method,Agilent GC-MS (2009),EMSL,1/22/23,3/4/24,EMSL-RZR \ No newline at end of file