-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add GeneMarker files as possible input #67
Merged
Merged
Changes from 12 commits
Commits
Show all changes
17 commits
Select commit
Hold shift + click to select a range
9548b12
updated config [skip ci]
rnmitchell 926b7f8
added genemarker files to format wrapper script [skip ci]
rnmitchell 572033a
began changing convert script [skip ci]
rnmitchell c6e391b
updated convert workflow with genemarker [skip ci]
rnmitchell a82a01b
update marker.py [skip ci]
rnmitchell cbcac35
remove print statement [skip ci]
rnmitchell 44c6253
updated snp workflows to new config [skip ci]
rnmitchell b82420e
updated tests [skip ci]
rnmitchell 09443cf
fixed default config [skip ci]
rnmitchell ef47be6
cleaning up debugging statements [skip ci]
rnmitchell 77ea84b
merge master
rnmitchell 457b87a
added software check and updated DYS448 code
rnmitchell 57c0647
fixed marker.py
rnmitchell 5127e82
changed append to concat
rnmitchell 719d82c
added test for genemarker files
rnmitchell 5c40923
updated readme
rnmitchell a533aa2
cleaned up test
rnmitchell File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,18 +40,24 @@ class InvalidSequenceError(ValueError): | |
pass | ||
|
||
|
||
class UnsupportedSoftwareError(ValueError): | ||
pass | ||
|
||
|
||
class STRMarker: | ||
def __init__(self, locus, sequence, uas=False, kit="forenseq"): | ||
def __init__(self, locus, sequence, software, kit="forenseq"): | ||
self.locus = locus | ||
self.sequence = sequence | ||
if locus not in str_marker_data: | ||
raise InvalidLocusError(locus) | ||
self.data = str_marker_data[locus] | ||
self.uas = uas | ||
if software.lower() not in ("uas", "straitrazor", "genemarker"): | ||
raise UnsupportedSoftwareError(software) | ||
self.software = software | ||
if kit.lower() not in ("forenseq", "powerseq"): | ||
raise UnsupportedKitError(kit) | ||
self.kit = kit.lower() | ||
if uas and self.data["ReverseCompNeeded"] == "Yes": | ||
if software == "uas" and self.data["ReverseCompNeeded"] == "Yes": | ||
self.sequence = reverse_complement(sequence) | ||
|
||
@property | ||
|
@@ -69,12 +75,17 @@ def _uas_bases_to_trim(self): | |
function determines the number of bases that need to be trimmed from the full amplicon | ||
sequence to recover the UAS core sequence. | ||
""" | ||
if self.uas: | ||
if self.software == "uas": | ||
return 0, 0 | ||
elif self.kit == "forenseq": | ||
return self.data["Foren_5"], self.data["Foren_3"] | ||
elif self.kit == "powerseq": | ||
return self.data["Power_5"], self.data["Power_3"] | ||
if self.locus == "D16S539" and self.software == "genemarker": | ||
return self.data["Power_5"], (self.data["Power_3"] - 3) | ||
elif self.locus == "D8S1179" and self.software == "genemarker": | ||
return (self.data["Power_5"] - 5), (self.data["Power_3"] - 5) | ||
else: | ||
return self.data["Power_5"], self.data["Power_3"] | ||
else: | ||
raise UnsupportedKitError(self.kit) | ||
|
||
|
@@ -86,7 +97,7 @@ def forward_sequence(self): | |
back to the UAS region. If the sequence has already been run through UAS, no trimming is | ||
required. | ||
""" | ||
if self.uas: | ||
if self.software == "uas": | ||
return self.sequence | ||
front, back = self._uas_bases_to_trim() | ||
if back == 0: | ||
|
@@ -107,7 +118,7 @@ def uas_sequence(self): | |
|
||
@property | ||
def flankseq_5p(self): | ||
if self.uas: | ||
if self.software == "uas": | ||
return None | ||
front, back = self._uas_bases_to_trim() | ||
if front == 0: | ||
|
@@ -116,7 +127,7 @@ def flankseq_5p(self): | |
|
||
@property | ||
def flank_5p(self): | ||
if self.uas or self.flankseq_5p == "": | ||
if self.software == "uas" or self.flankseq_5p == "": | ||
return None | ||
elif ( | ||
self.kit == "powerseq" | ||
|
@@ -136,7 +147,7 @@ def flank_5p(self): | |
|
||
@property | ||
def flankseq_3p(self): | ||
if self.uas: | ||
if self.software == "uas": | ||
return None | ||
front, back = self._uas_bases_to_trim() | ||
if back == 0: | ||
|
@@ -145,7 +156,7 @@ def flankseq_3p(self): | |
|
||
@property | ||
def flank_3p(self): | ||
if self.uas or self.flankseq_3p == "": | ||
if self.software == "uas" or self.flankseq_3p == "": | ||
return None | ||
elif ( | ||
self.kit == "powerseq" | ||
|
@@ -375,8 +386,17 @@ def convert(self): | |
if len(self.uas_sequence) < 110: | ||
bracketed_form = collapse_repeats_by_length(self.uas_sequence, 4) | ||
else: | ||
for m in re.finditer("GGGCTGCCTA", self.uas_sequence): | ||
break_point = m.end() | ||
if "GGGCTGCCTA" in self.uas_sequence: | ||
break_point = self.uas_sequence.index("GGGCTGCCTA") + 10 | ||
else: | ||
break_point = self.uas_sequence.index("TTTT") + 14 | ||
# for m in re.finditer("GGGCTGCCTA", self.uas_sequence): | ||
# break_point = m.end() | ||
# try: | ||
# break_point | ||
# except NameError: | ||
# for m in re.finditer("TTTT", self.uas_sequence): | ||
# break_point = m.end() + 10 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry to be a stickler but this should be cleaned up before merging. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. GAH yes |
||
bracketed_form = ( | ||
f"{collapse_repeats_by_length(self.uas_sequence[:break_point], 4)} " | ||
f"{collapse_repeats_by_length(self.uas_sequence[break_point:], 4)}" | ||
|
@@ -1420,7 +1440,7 @@ def convert(self): | |
def canonical(self): | ||
"""Canonical STR allele designation""" | ||
n = self.repeat_size | ||
if self.uas: | ||
if self.software == "uas": | ||
nsubout = self.data["BasesToSubtract"] | ||
elif self.kit == "forenseq": | ||
nsubout = self.data["BasesToSubtract"] - 12 | ||
|
@@ -1442,7 +1462,7 @@ class STRMarker_DYS390(STRMarker): | |
def canonical(self): | ||
"""Canonical STR allele designation""" | ||
n = self.repeat_size | ||
if self.uas or self.kit == "powerseq": | ||
if self.software == "uas" or self.kit == "powerseq": | ||
nsubout = self.data["BasesToSubtract"] | ||
else: | ||
nsubout = self.data["BasesToSubtract"] - 3 | ||
|
@@ -1461,7 +1481,7 @@ def designation(self): | |
lus, sec, ter = None, None, None | ||
lus = repeat_copy_number(self.convert, self.data["LUS"]) | ||
sec = repeat_copy_number(self.convert, self.data["Sec"]) | ||
if self.uas or self.kit == "powerseq": | ||
if self.software == "uas" or self.kit == "powerseq": | ||
ter = repeat_copy_number(self.convert, self.data["Tert"]) | ||
else: | ||
if self.convert[-1] == "G": | ||
|
@@ -1482,7 +1502,7 @@ class STRMarker_DYS385(STRMarker): | |
def canonical(self): | ||
"""Canonical STR allele designation""" | ||
n = self.repeat_size | ||
if self.uas or self.kit == "forenseq": | ||
if self.software == "uas" or self.kit == "forenseq": | ||
nsubout = self.data["BasesToSubtract"] | ||
else: | ||
nsubout = self.data["BasesToSubtract"] - 4 | ||
|
@@ -1610,7 +1630,7 @@ def flank_5p(self): | |
return flank | ||
|
||
|
||
def STRMarkerObject(locus, sequence, uas=False, kit="forenseq"): | ||
def STRMarkerObject(locus, sequence, software, kit="forenseq"): | ||
constructors = { | ||
"D8S1179": STRMarker_D8S1179, | ||
"D13S317": STRMarker_D13S317, | ||
|
@@ -1660,6 +1680,6 @@ def STRMarkerObject(locus, sequence, uas=False, kit="forenseq"): | |
} | ||
if locus in constructors: | ||
constructor = constructors[locus] | ||
return constructor(locus, sequence, uas=uas, kit=kit) | ||
return constructor(locus, sequence, software=software, kit=kit) | ||
else: | ||
return STRMarker(locus, sequence, uas=uas, kit=kit) | ||
return STRMarker(locus, sequence, software=software, kit=kit) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using the GeneMarker software, only two loci (D16 and D8) produced different sequences (STRait Razor contains a few additional bases). This accounts for these differences.