Skip to content

Commit

Permalink
Minor edits
Browse files Browse the repository at this point in the history
  • Loading branch information
samriddhi99 committed Aug 21, 2023
1 parent 3cb6bc1 commit 1d6921d
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 23 deletions.
6 changes: 3 additions & 3 deletions mavedb_mapping/blat_alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ def extract_blat_output(dat: dict):
def obtain_hit_starts(output, hit: int):
# a hit is a portion of similarity between query seq and matched seq
"""
Helper function to obtain HSP
Returns the start of hit
Helper function to obtain HSP.
Returns the starts of hit sequence.
"""
hit_starts = list()
for n in range(len(output[hit])):
Expand All @@ -66,7 +66,7 @@ def obtain_hit_starts(output, hit: int):

def obtain_hsp(output):
"""
Obtains high-scoring pairs (HSP) for query sequence
Obtains high-scoring pairs (HSP) for query sequence.
Parameters
----------
Expand Down
1 change: 1 addition & 0 deletions mavedb_mapping/get_allele.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
tr = Translator(data_proxy=dp, normalize=False)


# TODO: docstrings
def return_normalized_allele(allele, l):
if allele.state.sequence == "N" and l != "p":
allele.state.sequence = str(
Expand Down
27 changes: 19 additions & 8 deletions mavedb_mapping/output.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,36 @@
import json
# TODO: output for case where hgvs_nt column available


def output(dat, mappings, vrsmaps, blat_dict):
output_dict = {}
if mappings:
output_dict["sequence"] = coding(dat, mappings, blat_dict)
output_dict["sequence"] = protein_coding(dat, mappings, blat_dict)
output_dict["mapping"] = mapped_variants(vrsmaps[0])
return output_dict
else:
# TODO:for non coding
pass
output_dict["sequence"] = non_coding(dat, blat_dict)
output_dict["mapping"] = mapped_variants(vrsmaps[0])


def coding(dat, mappings, blat_dict, relation="SO:is_homologous_to"):
def non_coding(dat, blat_dict, relation="SO:is_homologous_to"):
seq_dict = {
"target": dat["target_sequence"],
"chrom": blat_dict["chrom"],
"target_type": dat["target_type"],
"relation": relation,
}
return seq_dict


def protein_coding(dat, mappings, blat_dict, relation="SO:is_homologous_to"):
seq_dict = {
"target": dat["target_sequence"],
"RefSeq_prot": mappings["RefSeq_prot"],
"RefSeq_nuc": mappings["RefSeq_nuc"],
"gsymb": mappings["gsymb"],
"chrom": blat_dict["chrom"],
"relation": relation
"target_type": dat["target_type"],
"relation": relation,
}
return seq_dict

Expand All @@ -28,7 +39,7 @@ def mapped_variants(vrs):
premapped = vrs["pre_mapping"]
mapped = vrs["mapped"]
mapp = list()
for i in range(3):
for i in range(len(premapped)):
c = maps(premapped[i], mapped[i])
mapp.append(c)
return mapp
Expand All @@ -38,5 +49,5 @@ def maps(pre, post):
d = {}
d["pre_mapped"] = pre
d["mapped"] = post

return d
18 changes: 6 additions & 12 deletions mavedb_mapping/vrs_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,11 +239,7 @@ def vrs_mapping_for_protein_coding(
scores_list.append(sn)
accessions_list.append(accn)

vrs_mappings_dict = mappings_list
scores_dict_coding = scores_list
mavedb_ids_coding = accessions_list

return vrs_mappings_dict
return mappings_list


def check_for_transcripts(mappings_dict):
Expand Down Expand Up @@ -305,14 +301,12 @@ def vrs_mapping_for_non_coding(dat, mave_blat_dict, ref, ranges, hits, variant_d
scores_list.append(sn)
accessions_list.append(accn)

vrs_mappings_dict = mappings_list
scores_dict_coding = scores_list
mavedb_ids_coding = accessions_list

return vrs_mappings_dict
return mappings_list


def vrs_mapping(dat, mappings_dict, mave_blat_dict, variant_data):
def vrs_mapping(
dat: dict, mappings_dict: dict, mave_blat_dict: dict, variant_data: dict
) -> list:
"""
Perform VRS mapping for protein coding scoresets.
Expand All @@ -327,7 +321,7 @@ def vrs_mapping(dat, mappings_dict, mave_blat_dict, variant_data):
mave_blat_dict: dict
Dicitionary containing data after doing BLAT Alignment
scores_csv: csv
variant_data: dict
Scores from MaveDB.
Returns
Expand Down

0 comments on commit 1d6921d

Please sign in to comment.