Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add GeneMarker files as possible input #67

Merged
merged 17 commits into from
Jan 2, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions lusSTR/cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ def main(args):
def edit_snp_config(config, args):
with open(config, "r") as file:
data = yaml.safe_load(file)
if args.straitrazor:
data["uas"] = False
if args.asoftware:
data["analysis_software"] = args.asoftware
if args.input:
data["samp_input"] = args.input
if args.out:
Expand Down Expand Up @@ -65,8 +65,8 @@ def edit_snp_config(config, args):
def edit_str_config(config, args):
with open(config, "r") as file:
data = yaml.safe_load(file)
if args.straitrazor:
data["uas"] = False
if args.asoftware:
data["analysis_software"] = args.asoftware
if args.powerseq:
data["kit"] = "powerseq"
if args.input:
Expand Down Expand Up @@ -102,8 +102,9 @@ def subparser(subparsers):
"-w", "--workdir", metavar="W", default=".",
help="directory to add config file; default is current working directory")
p.add_argument(
"--straitrazor", action="store_true",
help="Use if sequences have been previously run through STRait Razor."
"-a", "--analysis-software", choices=["uas", "straitrazor", "genemarker"], default="uas",
dest="asoftware", help="Analysis software program used prior to lusSTR. Choices are uas, "
"straitrazor or genemarker. Default is uas."
)
p.add_argument("--input", help="Input file or directory")
p.add_argument("--out", "-o", help="Output file/directory name")
Expand Down
4 changes: 2 additions & 2 deletions lusSTR/data/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
---

## general settings
uas: True ## True/False; if ran through UAS
sex: False ## True/False; include sex-chromosome STRs
analysis_software: uas #uas/straitrazor/genemarker; analysis software used prior to lusSTR
sex: False ## True/False; include sex-chromosome STRs
samp_input: "/path/to/input/directory/or/samples" ## input directory or sample; if not provided, will be cwd
output: "lusstr_output" ## output file/directory name; Example: "test_030923"

Expand Down
2 changes: 1 addition & 1 deletion lusSTR/data/snp_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
---

## general settings
uas: True ## True/False; if ran through UAS
analysis_software: "uas" ## uas/straitrazor; analysis software used
samp_input: "/path/to/input/directory/or/samples" ## input directory or sample; if not provided, will be cwd
output: "lusstr_output" ## output file/directory name; Example: "test_030923"
kit: "sigprep" ## sigprep/kintelligence
Expand Down
58 changes: 39 additions & 19 deletions lusSTR/scripts/marker.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,24 @@ class InvalidSequenceError(ValueError):
pass


class UnsupportedSoftwareError(ValueError):
pass


class STRMarker:
def __init__(self, locus, sequence, uas=False, kit="forenseq"):
def __init__(self, locus, sequence, software, kit="forenseq"):
self.locus = locus
self.sequence = sequence
if locus not in str_marker_data:
raise InvalidLocusError(locus)
self.data = str_marker_data[locus]
self.uas = uas
if software.lower() not in ("uas", "straitrazor", "genemarker"):
raise UnsupportedSoftwareError(software)
self.software = software
if kit.lower() not in ("forenseq", "powerseq"):
raise UnsupportedKitError(kit)
self.kit = kit.lower()
if uas and self.data["ReverseCompNeeded"] == "Yes":
if software == "uas" and self.data["ReverseCompNeeded"] == "Yes":
self.sequence = reverse_complement(sequence)

@property
Expand All @@ -69,12 +75,17 @@ def _uas_bases_to_trim(self):
function determines the number of bases that need to be trimmed from the full amplicon
sequence to recover the UAS core sequence.
"""
if self.uas:
if self.software == "uas":
return 0, 0
elif self.kit == "forenseq":
return self.data["Foren_5"], self.data["Foren_3"]
elif self.kit == "powerseq":
return self.data["Power_5"], self.data["Power_3"]
if self.locus == "D16S539" and self.software == "genemarker":
return self.data["Power_5"], (self.data["Power_3"] - 3)
elif self.locus == "D8S1179" and self.software == "genemarker":
return (self.data["Power_5"] - 5), (self.data["Power_3"] - 5)
else:
return self.data["Power_5"], self.data["Power_3"]
Comment on lines +83 to +88
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using the GeneMarker software, only two loci (D16 and D8) produced different sequences (STRait Razor contains a few additional bases). This accounts for these differences.

else:
raise UnsupportedKitError(self.kit)

Expand All @@ -86,7 +97,7 @@ def forward_sequence(self):
back to the UAS region. If the sequence has already been run through UAS, no trimming is
required.
"""
if self.uas:
if self.software == "uas":
return self.sequence
front, back = self._uas_bases_to_trim()
if back == 0:
Expand All @@ -107,7 +118,7 @@ def uas_sequence(self):

@property
def flankseq_5p(self):
if self.uas:
if self.software == "uas":
return None
front, back = self._uas_bases_to_trim()
if front == 0:
Expand All @@ -116,7 +127,7 @@ def flankseq_5p(self):

@property
def flank_5p(self):
if self.uas or self.flankseq_5p == "":
if self.software == "uas" or self.flankseq_5p == "":
return None
elif (
self.kit == "powerseq"
Expand All @@ -136,7 +147,7 @@ def flank_5p(self):

@property
def flankseq_3p(self):
if self.uas:
if self.software == "uas":
return None
front, back = self._uas_bases_to_trim()
if back == 0:
Expand All @@ -145,7 +156,7 @@ def flankseq_3p(self):

@property
def flank_3p(self):
if self.uas or self.flankseq_3p == "":
if self.software == "uas" or self.flankseq_3p == "":
return None
elif (
self.kit == "powerseq"
Expand Down Expand Up @@ -375,8 +386,17 @@ def convert(self):
if len(self.uas_sequence) < 110:
bracketed_form = collapse_repeats_by_length(self.uas_sequence, 4)
else:
for m in re.finditer("GGGCTGCCTA", self.uas_sequence):
break_point = m.end()
if "GGGCTGCCTA" in self.uas_sequence:
break_point = self.uas_sequence.index("GGGCTGCCTA") + 10
else:
break_point = self.uas_sequence.index("TTTT") + 14
# for m in re.finditer("GGGCTGCCTA", self.uas_sequence):
# break_point = m.end()
# try:
# break_point
# except NameError:
# for m in re.finditer("TTTT", self.uas_sequence):
# break_point = m.end() + 10
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry to be a stickler but this should be cleaned up before merging.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GAH yes

bracketed_form = (
f"{collapse_repeats_by_length(self.uas_sequence[:break_point], 4)} "
f"{collapse_repeats_by_length(self.uas_sequence[break_point:], 4)}"
Expand Down Expand Up @@ -1420,7 +1440,7 @@ def convert(self):
def canonical(self):
"""Canonical STR allele designation"""
n = self.repeat_size
if self.uas:
if self.software == "uas":
nsubout = self.data["BasesToSubtract"]
elif self.kit == "forenseq":
nsubout = self.data["BasesToSubtract"] - 12
Expand All @@ -1442,7 +1462,7 @@ class STRMarker_DYS390(STRMarker):
def canonical(self):
"""Canonical STR allele designation"""
n = self.repeat_size
if self.uas or self.kit == "powerseq":
if self.software == "uas" or self.kit == "powerseq":
nsubout = self.data["BasesToSubtract"]
else:
nsubout = self.data["BasesToSubtract"] - 3
Expand All @@ -1461,7 +1481,7 @@ def designation(self):
lus, sec, ter = None, None, None
lus = repeat_copy_number(self.convert, self.data["LUS"])
sec = repeat_copy_number(self.convert, self.data["Sec"])
if self.uas or self.kit == "powerseq":
if self.software == "uas" or self.kit == "powerseq":
ter = repeat_copy_number(self.convert, self.data["Tert"])
else:
if self.convert[-1] == "G":
Expand All @@ -1482,7 +1502,7 @@ class STRMarker_DYS385(STRMarker):
def canonical(self):
"""Canonical STR allele designation"""
n = self.repeat_size
if self.uas or self.kit == "forenseq":
if self.software == "uas" or self.kit == "forenseq":
nsubout = self.data["BasesToSubtract"]
else:
nsubout = self.data["BasesToSubtract"] - 4
Expand Down Expand Up @@ -1610,7 +1630,7 @@ def flank_5p(self):
return flank


def STRMarkerObject(locus, sequence, uas=False, kit="forenseq"):
def STRMarkerObject(locus, sequence, software, kit="forenseq"):
constructors = {
"D8S1179": STRMarker_D8S1179,
"D13S317": STRMarker_D13S317,
Expand Down Expand Up @@ -1660,6 +1680,6 @@ def STRMarkerObject(locus, sequence, uas=False, kit="forenseq"):
}
if locus in constructors:
constructor = constructors[locus]
return constructor(locus, sequence, uas=uas, kit=kit)
return constructor(locus, sequence, software=software, kit=kit)
else:
return STRMarker(locus, sequence, uas=uas, kit=kit)
return STRMarker(locus, sequence, software=software, kit=kit)
21 changes: 19 additions & 2 deletions lusSTR/tests/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,15 @@ def test_format_straitrazor(input, testoutput, tmp_path):
exp_output = data_file(testoutput)
obs_output = str(tmp_path / "lusstr_output.csv")
str_path = str(tmp_path)
config_arglist = ["config", "--input", str(input_file), "-w", str_path, "--straitrazor"]
config_arglist = [
"config",
"--input",
str(input_file),
"-w",
str_path,
"--analysis-software",
"straitrazor",
]
lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(config_arglist))
format_arglist = ["strs", "format", "-w", str_path]
lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(format_arglist))
Expand All @@ -67,7 +75,16 @@ def test_format_sex_loci_straitrazor(tmp_path):
exp_output = data_file("testformat_sr_sexloci.csv")
obs_output = str(tmp_path / "lusstr_output_sexloci.csv")
str_path = str(tmp_path)
config_arglist = ["config", "--input", str(inputdb), "-w", str_path, "--straitrazor", "--sex"]
config_arglist = [
"config",
"--input",
str(inputdb),
"-w",
str_path,
"--analysis-software",
"straitrazor",
"--sex",
]
lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(config_arglist))
format_arglist = ["strs", "format", "-w", str_path]
lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(format_arglist))
Expand Down
Loading
Loading