From dbceb5ae955d19d7d587c9765e06375457681e76 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Fri, 1 Dec 2023 14:34:54 -0500
Subject: [PATCH 1/6] added MPSproto as possible output file type [skip ci]

---
 lusSTR/cli/config.py      | 12 ++++++++----
 lusSTR/wrappers/filter.py | 21 +++++++++++++--------
 setup.py                  |  3 ++-
 3 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/lusSTR/cli/config.py b/lusSTR/cli/config.py
index a972c847..ad51049f 100644
--- a/lusSTR/cli/config.py
+++ b/lusSTR/cli/config.py
@@ -89,8 +89,8 @@ def edit_str_config(config, args):
         data["profile_type"] = "reference"
     if args.datatype:
         data["data_type"] = args.datatype
-    if args.efm:
-        data["output_type"] = "efm"
+    if args.software:
+        data["output_type"] = args.software
     if args.strand:
         data["strand"] = args.strand
     return data
@@ -126,11 +126,15 @@ def subparser(subparsers):
         "--reference", action="store_true", 
         help="Use for creating Reference profiles for STR workflow"
     )
-    p.add_argument("--efm", action="store_true",help="Use to create EuroForMix profiles")
+    p.add_argument(
+        "--software", choices=["efm", "mpsproto", "strmix"], default="strmix",
+        help="Specify the probabilistic genotyping software package of choice. The final output"
+        " files will be in the correct format for direct use. Default is strmix."
+    )
     p.add_argument(
         "--str-type", choices=["ce", "ngs", "lusplus"], default="ngs",
         dest="datatype", help="Data type for STRs. Options are: CE allele ('ce'), sequence "
-        "('ngs'), or LUS+ allele ('lusplus'). Default is 'ngs'.",
+        "or bracketed sequence form('ngs'), or LUS+ allele ('lusplus'). Default is 'ngs'.",
     )
     p.add_argument(
         "--noinfo", action="store_true", 
diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py
index 790b58f3..0ccc3c62 100644
--- a/lusSTR/wrappers/filter.py
+++ b/lusSTR/wrappers/filter.py
@@ -118,23 +118,25 @@ def process_strs(dict_loc, datatype, seq_col):
     return final_df, flags_df
 
 
-def EFM_output(profile, outfile, profile_type, data_type, separate=False):
+def EFM_output(profile, outfile, profile_type, data_type, col, separate=False):
     if profile_type == "reference":
         profile = profile[profile.allele_type == "real_allele"]
     else:
         profile = profile[profile.allele_type != "BelowAT"]
-    efm_profile = populate_efm_profile(profile, data_type)
+    efm_profile = populate_efm_profile(profile, data_type, col)
     if separate:
         write_sample_specific_efm_profiles(efm_profile, profile_type, data_type, outfile)
     else:
         write_aggregate_efm_profile(efm_profile, profile_type, data_type, outfile)
 
 
-def populate_efm_profile(profile, data_type):
+def populate_efm_profile(profile, data_type, colname):
     if data_type == "ce":
         prof_col = "CE_Allele"
     elif data_type == "lusplus":
         prof_col = "LUS_Plus"
+    elif data_type == "ngs":
+        prof_col = colname
     else:
         message = (
             f"Incorrect data type {data_type} specified for EFM. Please choose either "
@@ -328,7 +330,7 @@ def main(
         raise ValueError(f"unknown profile type '{profile_type}'")
     if data_type not in ("ce", "ngs", "lusplus"):
         raise ValueError(f"unknown data type '{data_type}'")
-    if output_type not in ("efm", "strmix"):
+    if output_type not in ("efm", "strmix", "mpsproto"):
         raise ValueError(f"unknown output type '{output_type}'")
     full_df = pd.read_csv(input, sep="\t")
     if output_dir is None:
@@ -336,17 +338,20 @@ def main(
     else:
         outpath = output_dir
     seq_col = "UAS_Output_Sequence" if strand == "uas" else "Forward_Strand_Sequence"
+    brack_col = (
+        "UAS_Output_Bracketed_Notation" if strand == "uas" else "Forward_Strand_Bracketed_Form"
+    )
     if nofilters:
         full_df["allele_type"] = "real_allele"
-        if output_type == "efm":
-            EFM_output(full_df, outpath, profile_type, data_type, separate)
+        if output_type == "efm" or output_type == "mpsproto":
+            EFM_output(full_df, outpath, profile_type, data_type, brack_col, separate)
         else:
             STRmix_output(full_df, outpath, profile_type, data_type, seq_col)
     else:
         dict_loc = {k: v for k, v in full_df.groupby(["SampleID", "Locus"])}
         final_df, flags_df = process_strs(dict_loc, data_type, seq_col)
-        if output_type == "efm":
-            EFM_output(final_df, outpath, profile_type, data_type, separate)
+        if output_type == "efm" or output_type == "mpsproto":
+            EFM_output(final_df, outpath, profile_type, data_type, brack_col, separate)
         else:
             STRmix_output(final_df, outpath, profile_type, data_type, seq_col)
         if info:
diff --git a/setup.py b/setup.py
index 4b5c79af..836d3836 100755
--- a/setup.py
+++ b/setup.py
@@ -32,7 +32,8 @@
             "lusSTR/tests/data/NGS_stutter_test/*",
             "lusSTR/tests/data/kinsnps/*",
             "lusSTR/tests/data/lusstr_output/*",
-            "lusSTR/tests/data/LUSPlus_stutter_test/*" "lusSTR/workflows/*",
+            "lusSTR/tests/data/LUSPlus_stutter_test/*",
+            "lusSTR/workflows/*",
             "lusSTR/wrappers/*",
         ]
     },

From c3113e0295e1ae03cbed394f0767d1e0d4368ccd Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Wed, 6 Dec 2023 09:14:08 -0500
Subject: [PATCH 2/6] updated snakemake file [skip ci]

---
 lusSTR/workflows/strs.smk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lusSTR/workflows/strs.smk b/lusSTR/workflows/strs.smk
index c9955579..86f7a526 100644
--- a/lusSTR/workflows/strs.smk
+++ b/lusSTR/workflows/strs.smk
@@ -19,7 +19,7 @@ separate = config["separate"]
 def get_sample_IDs(input, uas, output, software, separate):
     convert_out = f"{output}.txt"
     format_out = f"{output}.csv"
-    if software == "efm" and separate is False:
+    if (software == "efm" or software == "mpsproto") and separate is False:
         ID_list = os.path.basename(output)
     elif os.path.exists(convert_out):
         ID_list = get_existing_IDs(convert_out, "\t")

From 19ed704e7d931e179d6b247970a01f8238b2ae79 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Wed, 6 Dec 2023 09:30:29 -0500
Subject: [PATCH 3/6] added tests for MPSproto [skip ci]

---
 .../MPSProto_test/EFM_test_reference_ngs.csv  | 28 +++++++++++++++++++
 .../test_filtering_EFMoutput_ngs.csv          | 28 +++++++++++++++++++
 ...test_filtering_EFMoutput_sequence_info.csv | 26 +++++++++++++++++
 lusSTR/tests/test_filters.py                  | 24 ++++++++++++----
 4 files changed, 100 insertions(+), 6 deletions(-)
 create mode 100644 lusSTR/tests/data/MPSProto_test/EFM_test_reference_ngs.csv
 create mode 100644 lusSTR/tests/data/MPSProto_test/test_filtering_EFMoutput_ngs.csv
 create mode 100644 lusSTR/tests/data/MPSProto_test/test_filtering_EFMoutput_sequence_info.csv

diff --git a/lusSTR/tests/data/MPSProto_test/EFM_test_reference_ngs.csv b/lusSTR/tests/data/MPSProto_test/EFM_test_reference_ngs.csv
new file mode 100644
index 00000000..a5094a1b
--- /dev/null
+++ b/lusSTR/tests/data/MPSProto_test/EFM_test_reference_ngs.csv
@@ -0,0 +1,28 @@
+SampleName,Marker,Allele1,Allele2
+Positive_Control,CSF1PO,[AGAT]12,[AGAT]12
+Positive_Control,D10S1248,[GGAA]13,[GGAA]15
+Positive_Control,D12S391,[AGAT]11 [AGAC]6 AGAT,[AGAT]14 [AGAC]9
+Positive_Control,D13S317,[TATC]12 AATC [ATCT]3 TTCT GTCT GTC,[TATC]9 [AATC]2 [ATCT]3 TTCT GTCT GTC
+Positive_Control,D16S539,[GATA]13,[GATA]9
+Positive_Control,D17S1301,[AGAT]11,[AGAT]12
+Positive_Control,D18S51,[AGAA]16 AAAG AGAG AG,[AGAA]18 AAAG AGAG AG
+Positive_Control,D19S433,AAGG AAAG AAGG TAGG [AAGG]11 AGAG AGGA AGAA AGAG AG,AAGG AAAG AAGG TAGG [AAGG]12 AGAG AGGA AGAA AGAG AG
+Positive_Control,D1S1656,[TAGA]11 TAGG [TGTG]2 TG,[TAGA]13 [TGTG]2 TG
+Positive_Control,D20S482,[AGAT]14,[AGAT]15
+Positive_Control,D21S11,[TCTA]4 [TCTG]6 [TCTA]3 TA [TCTA]3 TCA [TCTA]2 TCCATA [TCTA]11,[TCTA]5 [TCTG]6 [TCTA]3 TA [TCTA]3 TCA [TCTA]2 TCCATA [TCTA]11 TA TCTA
+Positive_Control,D22S1045,[ATT]13 ACT [ATT]2,[ATT]13 ACT [ATT]2
+Positive_Control,D2S1338,[TGCC]7 [TTCC]12 GTCC [TTCC]2,[TGCC]7 [TTCC]15 GTCC [TTCC]2
+Positive_Control,D2S441,[TCTA]10,[TCTA]11 TTTA [TCTA]2
+Positive_Control,D3S1358,TCTA [TCTG]3 [TCTA]13,TCTA [TCTG]3 [TCTA]14
+Positive_Control,D4S2408,[ATCT]9,[ATCT]9
+Positive_Control,D5S818,[AGAT]12 AGAG,[AGAT]12 AGAG
+Positive_Control,D6S1043,[AGAT]12,[AGAT]14 ACAT [AGAT]5
+Positive_Control,D7S820,[GATA]11 GACA GATT GATA GTTT,[GATA]8 GACA GATT GATA GTTT
+Positive_Control,D8S1179,TCTA TCTG [TCTA]12,[TCTA]2 TCTG [TCTA]12
+Positive_Control,D9S1122,TAGA TCGA [TAGA]10,[TAGA]12
+Positive_Control,FGA,[TTTC]3 TTTT TTCT [CTTT]12 CTCC [TTCC]2,[TTTC]3 TTTT TTCT [CTTT]15 CTCC [TTCC]2
+Positive_Control,PENTA D,AAAAG [AAAGA]12,AAAAG [AAAGA]13
+Positive_Control,PENTA E,[AAAGA]14,[AAAGA]7
+Positive_Control,TH01,[AATG]6,[AATG]6 ATG [AATG]3
+Positive_Control,TPOX,[AATG]11,[AATG]11
+Positive_Control,VWA,TCTA [TCTG]3 [TCTA]12 TCCA TCTA,TCTA [TCTG]4 [TCTA]14 TCCA TCTA
diff --git a/lusSTR/tests/data/MPSProto_test/test_filtering_EFMoutput_ngs.csv b/lusSTR/tests/data/MPSProto_test/test_filtering_EFMoutput_ngs.csv
new file mode 100644
index 00000000..ef06e8a2
--- /dev/null
+++ b/lusSTR/tests/data/MPSProto_test/test_filtering_EFMoutput_ngs.csv
@@ -0,0 +1,28 @@
+SampleName,Marker,Allele1,Allele2,Allele3,Allele4,Height1,Height2,Height3,Height4
+Sample1,CSF1PO,,,,,,,,
+Sample1,D10S1248,,,,,,,,
+Sample1,D12S391,,,,,,,,
+Sample1,D13S317,,,,,,,,
+Sample1,D16S539,,,,,,,,
+Sample1,D17S1301,,,,,,,,
+Sample1,D18S51,,,,,,,,
+Sample1,D19S433,,,,,,,,
+Sample1,D1S1656,,,,,,,,
+Sample1,D20S482,,,,,,,,
+Sample1,D21S11,,,,,,,,
+Sample1,D22S1045,,,,,,,,
+Sample1,D2S1338,,,,,,,,
+Sample1,D2S441,,,,,,,,
+Sample1,D3S1358,,,,,,,,
+Sample1,D4S2408,[ATCT]10,[ATCT]8,[ATCT]9,,900,1000,1357,
+Sample1,D5S818,,,,,,,,
+Sample1,D6S1043,,,,,,,,
+Sample1,D7S820,,,,,,,,
+Sample1,D8S1179,TCTA TCTG [TCTA]11,[TCTA]2 TCTG [TCTA]10,[TCTA]2 TCTG [TCTA]11,[TCTA]2 TCTG [TCTA]9,95,89,739,26
+Sample1,D9S1122,TAGA TCGA [TAGA]10,TAGA TCGA [TAGA]11,[TAGA]10,[TAGA]11,108,948,87,991
+Sample1,FGA,[TTTC]3 TTTT TTCT [CTTT]10 CTCC [TTCC]2,[TTTC]3 TTTT TTCT [CTTT]12 CTCC [TTCC]2,[TTTC]3 TTTT TTCT [CTTT]14 CTCC [TTCC]2,[TTTC]3 TTTT TTCT [CTTT]15 CTCC [TTCC]2,181,1750,262,1436
+Sample1,PENTA D,AAAAG [AAAGA]13,,,,1000,,,
+Sample1,PENTA E,[AAAGA]7,,,,505,,,
+Sample1,TH01,[AATG]6,[AATG]7,,,1632,2197,,
+Sample1,TPOX,,,,,,,,
+Sample1,VWA,,,,,,,,
diff --git a/lusSTR/tests/data/MPSProto_test/test_filtering_EFMoutput_sequence_info.csv b/lusSTR/tests/data/MPSProto_test/test_filtering_EFMoutput_sequence_info.csv
new file mode 100644
index 00000000..6d5ac112
--- /dev/null
+++ b/lusSTR/tests/data/MPSProto_test/test_filtering_EFMoutput_sequence_info.csv
@@ -0,0 +1,26 @@
+SampleID,Locus,UAS_Output_Sequence,CE_Allele,UAS_Output_Bracketed_Notation,Reads,allele_type,parent_allele1,parent_allele2,allele1_ref_reads,allele2_ref_reads,perc_noise,perc_stutter
+Sample1,D4S2408,ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,10.0,[ATCT]10,900,real_allele,,,,,,
+Sample1,D4S2408,ATCTATCTATCTATCTATCTATCTATCTATCTATCT,9.0,[ATCT]9,1357,real_allele,,,,,,
+Sample1,D4S2408,ATCTATCTATCTATCTATCTATCTATCTATCT,8.0,[ATCT]8,1000,real_allele,,,,,,
+Sample1,D8S1179,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,14.0,[TCTA]2 TCTG [TCTA]11,739,real_allele,,,,,,
+Sample1,D8S1179,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,13.0,TCTA TCTG [TCTA]11,95,-1_stutter,[TCTA]2 TCTG [TCTA]11,,739.0,,,0.129
+Sample1,D8S1179,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,13.0,[TCTA]2 TCTG [TCTA]10,89,-1_stutter,[TCTA]2 TCTG [TCTA]11,,739.0,,,0.12
+Sample1,D8S1179,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTA,12.0,[TCTA]2 TCTG [TCTA]9,26,-2_stutter,[TCTA]2 TCTG [TCTA]11,,739.0,,,0.035
+Sample1,D8S1179,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,12.0,TCTA TCTG [TCTA]10,11,BelowAT,,,,,0.01,
+Sample1,D9S1122,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,13.0,TAGA TCGA [TAGA]11,948,real_allele,,,,,,
+Sample1,D9S1122,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,12.0,TAGA TCGA [TAGA]10,108,-1_stutter,TAGA TCGA [TAGA]11,,948.0,,,0.114
+Sample1,D9S1122,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,11.0,[TAGA]11,991,real_allele,,,,,,
+Sample1,D9S1122,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,10.0,[TAGA]10,87,-1_stutter,[TAGA]11,,991.0,,,0.088
+Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,23.0,[TTTC]3 TTTT TTCT [CTTT]15 CTCC [TTCC]2,1436,real_allele,,,,,,
+Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,22.0,[TTTC]3 TTTT TTCT [CTTT]14 CTCC [TTCC]2,262,-1_stutter,[TTTC]3 TTTT TTCT [CTTT]15 CTCC [TTCC]2,,1436.0,,,0.182
+Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,21.0,[TTTC]3 TTTT TTCT [CTTT]13 CTCC [TTCC]2,48,BelowAT,,,,,0.013,
+Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,20.0,[TTTC]3 TTTT TTCT [CTTT]12 CTCC [TTCC]2,1750,real_allele,,,,,,
+Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,18.0,[TTTC]3 TTTT TTCT [CTTT]10 CTCC [TTCC]2,181,real_allele,,,,,,
+Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,17.0,[TTTC]3 TTTT TTCT [CTTT]9 CTCC [TTCC]2,15,BelowAT,,,,,0.004,
+Sample1,PENTA D,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,15.0,AAAAG [AAAGA]13,50,real_allele,,,,,,
+Sample1,PENTA D,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,13.0,AAAAG [AAAGA]13,1000,real_allele,,,,,,
+Sample1,PENTA E,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,7.0,[AAAGA]7,505,real_allele,,,,,,
+Sample1,TH01,AATGAATGAATGAATGAATGAATGAATG,7.0,[AATG]7,2197,real_allele,,,,,,
+Sample1,TH01,AATGAATGAATGAATGAATGAATG,6.0,[AATG]6,1632,real_allele,,,,,,
+Sample1,TH01,AATGAATGAATGAATGAATG,5.0,[AATG]5,66,BelowAT,,,,,0.017,
+Sample1,TPOX,AATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATG,11.0,[AATG]11,15,BelowAT,,,,,1.0,
diff --git a/lusSTR/tests/test_filters.py b/lusSTR/tests/test_filters.py
index d5a85681..4b547664 100644
--- a/lusSTR/tests/test_filters.py
+++ b/lusSTR/tests/test_filters.py
@@ -158,9 +158,14 @@ def test_plus1stutter(
 
 
 @pytest.mark.parametrize(
-    "outputdir, datatype", [("RU_stutter_test/", "ce"), ("LUSPlus_stutter_test/", "lusplus")]
+    "outputdir, datatype, software",
+    [
+        ("RU_stutter_test/", "ce", "efm"),
+        ("LUSPlus_stutter_test/", "lusplus", "efm"),
+        ("MPSProto_test/", "ngs", "mpsproto"),
+    ],
 )
-def test_EFMoutput_format(outputdir, datatype, tmp_path):
+def test_EFMoutput_format(outputdir, datatype, software, tmp_path):
     str_path = str(tmp_path / "WD")
     inputfile = data_file("test_stutter.txt")
     exp_out = data_file(f"{outputdir}test_filtering_EFMoutput_{datatype}.csv")
@@ -173,7 +178,8 @@ def test_EFMoutput_format(outputdir, datatype, tmp_path):
         str_path,
         "-o",
         "test_output",
-        "--efm",
+        "--software",
+        software,
         "--str-type",
         datatype,
         "--input",
@@ -246,9 +252,14 @@ def test_flags(tmp_path):
 
 
 @pytest.mark.parametrize(
-    "outputdir, datatype", [("RU_stutter_test/", "ce"), ("LUSPlus_stutter_test/", "lusplus")]
+    "outputdir, datatype, software",
+    [
+        ("RU_stutter_test/", "ce", "efm"),
+        ("LUSPlus_stutter_test/", "lusplus", "efm"),
+        ("MPSProto_test/", "ngs", "mpsproto"),
+    ],
 )
-def test_efm_reference(outputdir, datatype, tmp_path):
+def test_efm_reference(outputdir, datatype, software, tmp_path):
     str_path = str(tmp_path / "WD")
     inputfile = data_file("test_references.txt")
     exp_out = data_file(f"{outputdir}EFM_test_reference_{datatype}.csv")
@@ -259,7 +270,8 @@ def test_efm_reference(outputdir, datatype, tmp_path):
         str_path,
         "--input",
         "WD",
-        "--efm",
+        "--software",
+        software,
         "--reference",
         "--str-type",
         datatype,

From 0cccd3d282ff9f679706327a6c6fb3c4d8b893f5 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Wed, 6 Dec 2023 09:33:40 -0500
Subject: [PATCH 4/6] updated setup.py and manifest [skip ci]

---
 MANIFEST.in | 1 +
 setup.py    | 1 +
 2 files changed, 2 insertions(+)

diff --git a/MANIFEST.in b/MANIFEST.in
index 21e04336..87da302c 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -12,3 +12,4 @@ include lusSTR/tests/data/NGS_stutter_test/*
 include lusSTR/tests/data/kinsnps/*
 include lusSTR/tests/data/lusstr_output/*
 include lusSTR/tests/data/LUSPlus_stutter_test/*
+include lusSTR/tests/data/MPSProto_test/*
diff --git a/setup.py b/setup.py
index 836d3836..43c6f4da 100755
--- a/setup.py
+++ b/setup.py
@@ -33,6 +33,7 @@
             "lusSTR/tests/data/kinsnps/*",
             "lusSTR/tests/data/lusstr_output/*",
             "lusSTR/tests/data/LUSPlus_stutter_test/*",
+            "lusSTR/tests/data/MPSProto_test/*",
             "lusSTR/workflows/*",
             "lusSTR/wrappers/*",
         ]

From 2e70d0097450e9c501a2103dddee47abe40bc121 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Wed, 6 Dec 2023 09:56:04 -0500
Subject: [PATCH 5/6] updated tests and README

---
 README.md                                              | 10 +++++-----
 .../EFM_test_reference_ngs.csv                         |  0
 .../test_filtering_EFMoutput_ngs.csv                   |  0
 .../test_filtering_EFMoutput_sequence_info.csv         |  0
 lusSTR/tests/test_filters.py                           |  7 ++++---
 5 files changed, 9 insertions(+), 8 deletions(-)
 rename lusSTR/tests/data/{MPSProto_test => MPSproto_test}/EFM_test_reference_ngs.csv (100%)
 rename lusSTR/tests/data/{MPSProto_test => MPSproto_test}/test_filtering_EFMoutput_ngs.csv (100%)
 rename lusSTR/tests/data/{MPSProto_test => MPSproto_test}/test_filtering_EFMoutput_sequence_info.csv (100%)

diff --git a/README.md b/README.md
index eed6814e..0d1c9e82 100755
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 lusSTR is a tool written in Python to convert NGS sequence data of forensic STR loci to different sequence representations (sequence bracketed form) and allele designations (CE allele, LUS/LUS+ alleles) for ease in downstream analyses. See the below section ```Converting STR sequences to other sequence representations and allele designations``` for more information. 
 
-Further, lusSTR can perform filtering and stutter identification using the CE allele, the LUS+ allele, or the bracketed sequence form for autosomal loci and create files for direct input into two probabilistic genotyping software packages, EuroForMix (CE and LUS+) and STRmix (CE and NGS). 
+Further, lusSTR can perform filtering and stutter identification using the CE allele, the LUS+ allele, or the bracketed sequence form for autosomal loci and create files for direct input into three probabilistic genotyping software packages, EuroForMix (CE and LUS+), MPSproto (NGS), and STRmix (CE and NGS). 
 
 lusSTR also processes SNP data from the Verogen ForenSeq and Kintelligence panels and create evidence and/or reference files for use in EFM. See the below section ```SNP Data Processing``` for more information.
 
@@ -65,12 +65,12 @@ kit: ```forenseq``` (forenseq/powerseq) (invoke the ```--powerseq``` flag if usi
 nocombine: ```False``` (True/False); do not combine identical sequences during the ```convert``` step, if using STRait Razor data. (invoke the ```--nocombine``` flag)  
 
 ### filter settings  
-output_type: ```strmix``` (strmix/efm) (invoke ```--efm``` flag if creating output for EuroForMix)  
+output_type: ```strmix``` (strmix/efm/mpsproto) (indicate using the ```--software``` flag)  
 profile_type: ```evidence``` (evidence/reference) (invoke ```--reference``` flag if creating a reference output file)  
 data_type: ```ngs``` (ce/ngs/lusplus) (indicate using the ```--str-type```)  
 info: ```True``` (True/False); create allele information file (invoke ```--noinfo``` flag to not create the allele information file)  
-separate: ```False``` (True/False); for EFM only, if True will create individual files for samples; if False, will create one file with all samples (invoke ```--separate``` flag to separate EFM output files)  
-nofilters: ```False``` (True/False); skip all filtering steps but still creates EFM/STRmix output files (invoke ```--nofilters``` flag)  
+separate: ```False``` (True/False); for EFM/MPSproto only, if True will create individual files for samples; if False, will create one file with all samples (invoke ```--separate``` flag to separate EFM/MPSproto output files)  
+nofilters: ```False``` (True/False); skip all filtering steps but still creates EFM/MPSproto/STRmix output files (invoke ```--nofilters``` flag)  
 strand: ```uas``` (uas/forward); indicates the strand orientation in which to report the sequence in the final output table for STRmix NGS only (indicate using ```--strand```)
 
 One additional argument can be provided with ```lusstr config```:  
@@ -189,7 +189,7 @@ Each locus is checked for containing greater than 2 alleles (indicating a potent
 
 When using STRmix data, the data type can be specified using the ```data-type``` setting as either ```ce```, ```ngs``` or ```lusplus``` (default is ```ngs```). If ```ngs``` or ```lusplus``` is specified, the same size filter is applied following the stutter filter. Further, the columns and column names in the output file differ based on the data type.
 
-Finally, output files are created for direct use in EuroForMix (EFM) or STRmix. If EFM is specified, a single file is created containing all samples in the input file (however, separate output files for each sample can be created with the ```separate``` setting specified in the config file). If STRmix is specified, a directory containing files for each individual sample is created. The ```profile-type``` setting allows for the creation of either a ```reference``` or ```evidence``` profile. Both EuroForMix and STRmix require different formatting depending on the type of sample. 
+Finally, output files are created for direct use in EuroForMix (EFM), MPSproto or STRmix. If EFM or MPSproto is specified, a single file is created containing all samples in the input file (however, separate output files for each sample can be created with the ```separate``` setting specified in the config file). If STRmix is specified, a directory containing files for each individual sample is created. The ```profile-type``` setting allows for the creation of either a ```reference``` or ```evidence``` profile. Both EuroForMix/MPSproto and STRmix require different formatting depending on the type of sample. 
 
 ___
 
diff --git a/lusSTR/tests/data/MPSProto_test/EFM_test_reference_ngs.csv b/lusSTR/tests/data/MPSproto_test/EFM_test_reference_ngs.csv
similarity index 100%
rename from lusSTR/tests/data/MPSProto_test/EFM_test_reference_ngs.csv
rename to lusSTR/tests/data/MPSproto_test/EFM_test_reference_ngs.csv
diff --git a/lusSTR/tests/data/MPSProto_test/test_filtering_EFMoutput_ngs.csv b/lusSTR/tests/data/MPSproto_test/test_filtering_EFMoutput_ngs.csv
similarity index 100%
rename from lusSTR/tests/data/MPSProto_test/test_filtering_EFMoutput_ngs.csv
rename to lusSTR/tests/data/MPSproto_test/test_filtering_EFMoutput_ngs.csv
diff --git a/lusSTR/tests/data/MPSProto_test/test_filtering_EFMoutput_sequence_info.csv b/lusSTR/tests/data/MPSproto_test/test_filtering_EFMoutput_sequence_info.csv
similarity index 100%
rename from lusSTR/tests/data/MPSProto_test/test_filtering_EFMoutput_sequence_info.csv
rename to lusSTR/tests/data/MPSproto_test/test_filtering_EFMoutput_sequence_info.csv
diff --git a/lusSTR/tests/test_filters.py b/lusSTR/tests/test_filters.py
index 4b547664..195f0e70 100644
--- a/lusSTR/tests/test_filters.py
+++ b/lusSTR/tests/test_filters.py
@@ -162,7 +162,7 @@ def test_plus1stutter(
     [
         ("RU_stutter_test/", "ce", "efm"),
         ("LUSPlus_stutter_test/", "lusplus", "efm"),
-        ("MPSProto_test/", "ngs", "mpsproto"),
+        ("MPSproto_test/", "ngs", "mpsproto"),
     ],
 )
 def test_EFMoutput_format(outputdir, datatype, software, tmp_path):
@@ -256,7 +256,7 @@ def test_flags(tmp_path):
     [
         ("RU_stutter_test/", "ce", "efm"),
         ("LUSPlus_stutter_test/", "lusplus", "efm"),
-        ("MPSProto_test/", "ngs", "mpsproto"),
+        ("MPSproto_test/", "ngs", "mpsproto"),
     ],
 )
 def test_efm_reference(outputdir, datatype, software, tmp_path):
@@ -404,7 +404,8 @@ def test_lusplus_sequence_info(tmp_path):
         "forward",
         "--str-type",
         "lusplus",
-        "--efm",
+        "--software",
+        "efm",
     ]
     lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist))
     shutil.copyfile(inputfile, os.path.join(str_path, "LUSPlus.csv"))

From 60ab0e5ab8fd1ff0ce2412a6b26b7511d8db88db Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Wed, 6 Dec 2023 10:45:24 -0500
Subject: [PATCH 6/6] fixed manifest and setup.py

---
 MANIFEST.in | 2 +-
 setup.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/MANIFEST.in b/MANIFEST.in
index 87da302c..acbf06e2 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -12,4 +12,4 @@ include lusSTR/tests/data/NGS_stutter_test/*
 include lusSTR/tests/data/kinsnps/*
 include lusSTR/tests/data/lusstr_output/*
 include lusSTR/tests/data/LUSPlus_stutter_test/*
-include lusSTR/tests/data/MPSProto_test/*
+include lusSTR/tests/data/MPSproto_test/*
diff --git a/setup.py b/setup.py
index 43c6f4da..58ea49d0 100755
--- a/setup.py
+++ b/setup.py
@@ -33,7 +33,7 @@
             "lusSTR/tests/data/kinsnps/*",
             "lusSTR/tests/data/lusstr_output/*",
             "lusSTR/tests/data/LUSPlus_stutter_test/*",
-            "lusSTR/tests/data/MPSProto_test/*",
+            "lusSTR/tests/data/MPSproto_test/*",
             "lusSTR/workflows/*",
             "lusSTR/wrappers/*",
         ]