Skip to content

Commit

Permalink
Docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
samriddhi99 committed Aug 24, 2023
1 parent 9a32ac9 commit ebbcab8
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 8 deletions.
5 changes: 5 additions & 0 deletions mavedb_mapping/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from mavedb_mapping.vrs_mapping import vrs_mapping
from mavedb_mapping import data_file_path
from mavedb_mapping.output import output
from mavedb_mapping.find_start_pos import if_start_not_first
import json

"""Function to create a mapping given a scoreset and its scores"""
Expand All @@ -18,6 +19,10 @@ def main_map(scoreset, scores_csv):
# Mapping process
blat_dict = mave_to_blat(dat)
transcripts = main(blat_dict, dat)
# for seq where start pos is not first
c = if_start_not_first(dat, scores)
if c:
transcripts["start"] = c
vrs = vrs_mapping(dat, transcripts, blat_dict, scores)
out = output(dat, transcripts, vrs, blat_dict)
return out
16 changes: 10 additions & 6 deletions mavedb_mapping/metadata_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

"""Function that specifies input format"""


def metadata_obtain(scoreset_json, scores_csv) -> dict:
"""
Extract data from MaveDB scoresets and convert them into an input format that imitates
Expand All @@ -13,14 +14,14 @@ def metadata_obtain(scoreset_json, scores_csv) -> dict:
----------
scoreset_json: json object
Scoreset JSON object
scores_csv
Returns:
----------
dat: dict
Dictionary containing extracted metadata
variant_data: dict
Dictionary containing variants
Expand All @@ -41,15 +42,18 @@ def metadata_obtain(scoreset_json, scores_csv) -> dict:
"target_sequence_type": target_type,
"target_type": target,
}

vardat = pd.read_csv(scores_csv)

varm = vardat["hgvs_pro"]
ntlist = vardat["hgvs_nt"]
scores = vardat["score"].to_list()
accessions = vardat["accession"].to_list()

variant_data = {"hgvs_pro": varm, "hgvs_nt":ntlist, "scores":scores, "accessions":accessions}
variant_data = {
"hgvs_pro": varm,
"hgvs_nt": ntlist,
"scores": scores,
"accessions": accessions,
}
return dat, variant_data

#TODO: change other things according to these changes
24 changes: 22 additions & 2 deletions mavedb_mapping/output.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,31 @@
# TODO: output for case where hgvs_nt column available
"""Produces the output with all data from the mapping"""


def output(dat, mappings, vrsmaps, blat_dict):
def output(dat: dict, mappings: dict, vrsmaps: dict, blat_dict: dict) -> dict:
"""
Produces final output in the form of a dictionary
Parameters
----------
dat: dict
Dictionary containing data required for mapping.
mappings: dict
Dictionary after transcript selection.
vrsmaps: dict
Dictionary containing premapped and mapped variants
blat_dict: dict
Dicitionary containing data after doing BLAT Alignment
"""
output_dict = {}
if mappings:
output_dict["sequence"] = protein_coding(dat, mappings, blat_dict)
output_dict["mapping"] = mapped_variants(vrsmaps[0])
if len(vrsmaps > 1):
output_dict["mapping"] = mapped_variants(vrsmaps[1])

return output_dict
else:
output_dict["sequence"] = non_coding(dat, blat_dict)
Expand Down

0 comments on commit ebbcab8

Please sign in to comment.