diff --git a/mavedb_mapping/main.py b/mavedb_mapping/main.py index c78e9ee..3d28c25 100644 --- a/mavedb_mapping/main.py +++ b/mavedb_mapping/main.py @@ -4,6 +4,7 @@ from mavedb_mapping.vrs_mapping import vrs_mapping from mavedb_mapping import data_file_path from mavedb_mapping.output import output +from mavedb_mapping.find_start_pos import if_start_not_first import json """Function to create a mapping given a scoreset and its scores""" @@ -18,6 +19,10 @@ def main_map(scoreset, scores_csv): # Mapping process blat_dict = mave_to_blat(dat) transcripts = main(blat_dict, dat) + # for seq where start pos is not first + c = if_start_not_first(dat, scores) + if c: + transcripts["start"] = c vrs = vrs_mapping(dat, transcripts, blat_dict, scores) out = output(dat, transcripts, vrs, blat_dict) return out diff --git a/mavedb_mapping/metadata_process.py b/mavedb_mapping/metadata_process.py index e9737d7..98831de 100644 --- a/mavedb_mapping/metadata_process.py +++ b/mavedb_mapping/metadata_process.py @@ -3,6 +3,7 @@ """Function that specifies input format""" + def metadata_obtain(scoreset_json, scores_csv) -> dict: """ Extract data from MaveDB scoresets and convert them into an input format that imitates @@ -13,14 +14,14 @@ def metadata_obtain(scoreset_json, scores_csv) -> dict: ---------- scoreset_json: json object Scoreset JSON object - + scores_csv Returns: ---------- dat: dict Dictionary containing extracted metadata - + variant_data: dict Dictionary containing variants @@ -41,7 +42,7 @@ def metadata_obtain(scoreset_json, scores_csv) -> dict: "target_sequence_type": target_type, "target_type": target, } - + vardat = pd.read_csv(scores_csv) varm = vardat["hgvs_pro"] @@ -49,7 +50,10 @@ def metadata_obtain(scoreset_json, scores_csv) -> dict: scores = vardat["score"].to_list() accessions = vardat["accession"].to_list() - variant_data = {"hgvs_pro": varm, "hgvs_nt":ntlist, "scores":scores, "accessions":accessions} + variant_data = { + "hgvs_pro": varm, + "hgvs_nt": ntlist, + "scores": scores, + "accessions": accessions, + } return dat, variant_data - -#TODO: change other things according to these changes \ No newline at end of file diff --git a/mavedb_mapping/output.py b/mavedb_mapping/output.py index 2f61364..f10b1c7 100644 --- a/mavedb_mapping/output.py +++ b/mavedb_mapping/output.py @@ -1,11 +1,31 @@ -# TODO: output for case where hgvs_nt column available +"""Produces the output with all data from the mapping""" -def output(dat, mappings, vrsmaps, blat_dict): +def output(dat: dict, mappings: dict, vrsmaps: dict, blat_dict: dict) -> dict: + """ + Produces final output in the form of a dictionary + + Parameters + ---------- + dat: dict + Dictionary containing data required for mapping. + + mappings: dict + Dictionary after transcript selection. + + vrsmaps: dict + Dictionary containing premapped and mapped variants + + blat_dict: dict + Dicitionary containing data after doing BLAT Alignment + """ output_dict = {} if mappings: output_dict["sequence"] = protein_coding(dat, mappings, blat_dict) output_dict["mapping"] = mapped_variants(vrsmaps[0]) + if len(vrsmaps > 1): + output_dict["mapping"] = mapped_variants(vrsmaps[1]) + return output_dict else: output_dict["sequence"] = non_coding(dat, blat_dict)