diff --git a/MANIFEST.in b/MANIFEST.in index a21bdbf7..e6b021d5 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,10 +1,12 @@ include versioneer.py include lusSTR/_version.py -include lusSTR/filters.json -include lusSTR/str_markers.json -include lusSTR/snp_data.json -include lusSTR/filters.json +include lusSTR/data/filters.json +include lusSTR/data/str_markers.json +include lusSTR/data/snp_data.json +include lusSTR/data/config.yaml include lusSTR/tests/data/* +include lusSTR/workflows/* +include lusSTR/wrappers/* include lusSTR/tests/data/STRait_Razor_test_output/* include lusSTR/tests/data/UAS_bulk_input/* include lusSTR/tests/data/snps/* diff --git a/Makefile b/Makefile index 62087006..1f244636 100755 --- a/Makefile +++ b/Makefile @@ -6,15 +6,15 @@ help: Makefile ## test: run the automated test suite and print coverage information test: - pytest --cov=lusSTR --doctest-modules lusSTR/annot.py lusSTR/tests/test_*.py + pytest -m "not snps" --cov=lusSTR --doctest-modules lusSTR/tests/test_*.py ## style: check code style style: - black --line-length=99 --check *.py lusSTR/*.py lusSTR/tests/test_*.py + black --line-length=99 --check *.py lusSTR/scripts/*.py lusSTR/wrappers/*.py lusSTR/tests/test_*.py ## format: auto-reformat code with Black format: - black --line-length=99 *.py lusSTR/*.py lusSTR/tests/test_*.py + black --line-length=99 *.py lusSTR/scripts/*.py lusSTR/wrappers/*.py lusSTR/tests/test_*.py ## devenv: configure a development environment devenv: diff --git a/README.md b/README.md index 08e4bcc2..6fe941c5 100755 --- a/README.md +++ b/README.md @@ -1,12 +1,13 @@ # lusSTR -lusSTR is a tool written in Python to convert NGS sequence data of forensic STR loci to different sequence representations and allele designations for ease in downstream analyses. +lusSTR is a tool written in Python to convert NGS sequence data of forensic STR loci to different sequence representations (sequence bracketed form) and allele designations (CE allele, LUS/LUS+ alleles) for ease in downstream analyses. See the below section ```Converting STR sequences to other sequence representations and allele designations``` for more information). -This Python package has been written for use with either: (1) the 27 autosomal STR loci, 24 Y-chromosome STR loci and 7 X-chromosome STR loci from the Verogen ForenSeq panel, or (2) the 22 autosomal STR loci and 22 Y-chromosome loci from the Promega PowerSeq panel. The package accomodates either the Sample Details Report from the ForenSeq Universal Analysis Software (UAS) or STRait Razor output. If STRait Razor output is provided, sequences are filtered to the UAS sequence region for translation. +This Python package has been written for use with either: (1) the 27 autosomal STR loci, 24 Y-chromosome STR loci and 7 X-chromosome STR loci from the Verogen ForenSeq panel, or (2) the 22 autosomal STR loci and 22 Y-chromosome loci from the Promega PowerSeq panel. The package accomodates either the Sample Details Report from the ForenSeq Universal Analysis Software (UAS) or STRait Razor output. If STRait Razor output is provided, sequences are filtered to the UAS sequence region for conversion. -lusSTR can perform filtering and stutter identification using the RU allele for autosomal loci and create files for direct input into two probabilistic genotyping software packages, including EuroForMix (EFM) and STRmix. +lusSTR can perform filtering and stutter identification using the RU allele or the sequence bracketed form for autosomal loci and create files for direct input into two probabilistic genotyping software packages, EuroForMix (EFM) and STRmix. -lusSTR also processes SNP data from the Verogen ForenSeq panel. ForenSeq consists of 94 identity SNPs, 22 phenotype (hair/eye color) SNPs, 54 ancestry SNPs and 2 phenotype and ancestry SNPs. Identity SNP data is provided in the UAS Sample Details Report; phenotype and ancestry SNP data is provided in the UAS Phenotype Report. All SNP calls are also reported in the STRait Razor output. +lusSTR also processes SNP data from the Verogen ForenSeq panel. ForenSeq consists of 94 identity SNPs, 22 phenotype (hair/eye color) SNPs, 54 ancestry SNPs and 2 phenotype and ancestry SNPs. Identity SNP data is provided in the UAS Sample Details Report; phenotype and ancestry SNP data is provided in the UAS Phenotype Report. All SNP calls are also reported in the STRait Razor output. +***SNP processing currently a work in progress.** ## Installation @@ -25,122 +26,124 @@ make devenv ## Usage -lusSTR accomodates three different input formats: -(1) UAS Sample Details Report and UAS Phenotype Report (for SNP processing) in .xlsx format -(2) STRait Razor output with one sample per file -(3) Sample(s) sequences in CSV format; first four columns must be Locus, NumReads, Sequence, SampleID; Optional last two columns can be Project and Analysis IDs. +lusSTR accomodates three different input formats: +(1) UAS Sample Details Report and UAS Phenotype Report (for SNP processing) in .xlsx format (a single file or directory containing multiple files) +(2) STRait Razor output with one sample per file (a single file or directory containing multiple files) +(3) Sample(s) sequences in CSV format; first four columns must be Locus, NumReads, Sequence, SampleID; Optional last two columns can be Project and Analysis IDs. -### Formatting input for STR loci sequences +*These individual sample files or directory of files must be specified in the config file (see below).* -If inputting data from either the UAS Sample Details Report or STRait Razor output, the user must first invoke the ```format``` command to extract necessary information and format for the ```annotate``` command. -The ```format``` command removes unnecessary rows/columns and outputs a table in CSV format containing the following columns: -* Locus -* Number of Reads observed with the specified sequence -* Sequence -* Sample ID -* Project ID (if provided) -* Analysis ID (if provided) +lusSTR utilizes the ```lusstr``` command to invoke various Snakemake workflows. The ```lusstr strs``` command invokes the STR analysis workflow. *The SNP workflow is currently under construction.* +___ +### Creating the config file -If including the sex chromosome loci (using the ```--include-sex``` flag), the ```format``` command will output a second table for the sex loci with the same columns. +Running ```lusstr config``` creates a config file containing the default settings for the lusSTR STR analysis pipeline. The settings can be changed with command line arguments (see below) or by manually editing the config file. The default settings, along with their descriptions, are as follows: +### general settings: +uas: ```True``` (True/False); if ran through UAS (invoke ```--straitrazor``` flag if STRait Razor was used) +sex: ```False``` (True/False); include sex-chromosome STRs (invoke ```--sex``` flag) +samp_input: ```/path/to/input/directory/or/samples``` input directory or sample; if not provided, will be current working directory (indicate using ```--input path/to/dir``` ) +output: ```lusstr_output``` output file/directory name (indicate using ```--out dir/sampleid e.g. --out test_030923```) -#### **UAS Sample Details Report** +### convert settings +kit: ```forenseq``` (forenseq/powerseq) (invoke the ```--powerseq``` flag if using PowerSeq data) +nocombine: ```False``` (True/False); do not combine identical sequences during the ```convert``` step, if using STRait Razor data. (invoke the ```--nocombine``` flag) -If using the UAS Sample Details Report, the user must specify the input file or folder as well an output file and the ```--uas``` flag: +### filter settings +output_type: ```strmix``` (strmix/efm) (invoke ```--efm``` flag if creating output for EuroForMix) +profile_type: ```evidence``` (evidence/reference) (invoke ```--reference``` flag if creating a reference output file) +data_type: ```ngs``` (ce/ngs) (invoke ```--ce``` if using CE allele data) +info: ```True``` (True/False); create allele information file (invoke ```--noinfo``` flag to not create the allele information file) +separate: ```False``` (True/False); for EFM only, if True will create individual files for samples; if False, will create one file with all samples (invoke ```--separate``` flag to separate EFM output files) +nofilters: ```False``` (True/False); skip all filtering steps but still creates EFM/STRmix output files (invoke ```--nofilters``` flag) + +One additional argument can be provided with ```lusstr config```: +```-w```/```-workdir``` sets the working directory (e.g. ```-w lusstr_files/```) and all created files are stored in that directory. + +**Once the config file is created with all the desired settings, the STR workflow can be run. The config file must be located in the working directory.** +___ +## Running the lusSTR STR workflow + +The lusSTR STR workflow consists of three steps: +(1) ```format```: formatting input +(2) ```convert```: converting sequences to other sequence representations and allele designations +(3) ```filter```: performing several filtering steps and creating appropriately formatted files for use in EuroForMix or STRmix. + +Any or all steps can be run. In order to run all three steps, the following command can be used: ``` -lusstr format -o --uas +lusstr strs all ``` -Example: +One additional argument can be provided, a working directory. +**This working directory must contain the config file.** +The default working directory is the current directory. ``` -lusstr format UAS_Sample_Details_Report.xlsx -o UAS_test_file.csv --uas +lusstr strs all -w lusstr_files/ ``` -Example using a folder of UAS Sample Details Reports: + +Individual steps can also be run ``` -lusstr format Run01/ -o Run01_compiled_file.csv --uas +lusstr strs format ``` -Including the sex chromosome loci using the ```--include-sex```: - ``` -lusstr format UAS_Sample_Details_Report.xlsx -o UAS_test_file.csv --uas --include-sex +lusstr strs convert -w lusstr_files/ ``` -The above command will output two tables which are used in the ```annotate``` command: ```UAS_test_file.csv``` and ```UAS_test_file_sexloci.csv```. +**In order to run the ```convert``` step, the appropriately formatted ```.csv``` file containing the sequences normally created in the ```format``` step must be present in the working directory. See the above ```Usage``` section for specific information about that file (required columns, etc.).** -#### **STRait Razor** +---- -If using lusSTR version 0.4 or above, STRait Razor data **must** be produced using the STRait Razor config file released in January 2021 (ForenSeqv1.25.config and PowerSeqv2.1.config). These config files are available here: https://github.com/Ahhgust/STRaitRazor/tree/103ef68746f010add8f21266fa8bf8fb9f4a076e/. +## Additional information about each step -If using the output from STRait Razor, the files **must** be labeled as ```SampleID.txt``` (example: ```Sample0001.txt```) and can either be specified as a single file or as a folder of multiple STRait Razor output files (folder labeled with the project ID). The user must specify the file or folder name for the ```format``` command as well as an output filename (all sample files will be compiled into one file): -``` -lusstr format -o -``` -Examples: +### Formatting input for STR loci sequences -``` -lusstr format STRaitRazorOutputFolder/ -o STRaitRazor_test_file.csv -``` -``` -lusstr format A001.txt -o A001.csv -``` +If inputting data from either the UAS Sample Details Report or STRait Razor output, the user must first invoke the ```format``` step to extract necessary information and format for the ```convert``` step. -Again, sex loci can be included using the ```--include-sex``` flag. -``` -lusstr format STRaitRazorOutputFolder/ -o STRaitRazor_test_file.csv --include-sex -``` -With this, two tables will be produced: ```STRaitRazor_test_file.csv``` and ```STRaitRazor_test_file_sex_loci.csv```. +The ```format``` command removes unnecessary rows/columns and outputs a table in CSV format containing the following columns: +* Locus +* Number of Reads observed with the specified sequence +* Sequence +* Sample ID +* Project ID (if provided) +* Analysis ID (if provided) + +If including the sex chromosome loci as specified in the config file, the ```format``` command will output a second table for the sex loci with the same columns (```*_sexloci.csv```). +--- -### Translation of STR loci sequences +### Converting STR sequences to other sequence representations and allele designations -The ```annotate``` command produces a tab-delineated table with the following columns: +The ```convert``` step produces a tab-delineated table with the following columns: * Sample ID * Project ID (if provided) * Analysis ID (if provided) * Locus * UAS Output sequence: can be forward or reverse strand * Forward strand sequence: will be same as UAS Output sequence for those loci reported on forward strand -* RU allele: common length-based repeat unit (RU) allele designation +* UAS Output Bracketed notation: Bracketed sequence form for the reported UAS sequence output (will be same for those loci which report the forward strand) * Forward Strand Bracketed notation: Bracketed notation for forward strand sequence -* UAS Output Bracketed notation: Bracketed annotation for the reported UAS sequence output (will be same for those loci which report the forward strand) +* CE allele: common length-based CE allele designation (also called the repeat unit, or RU, allele) * LUS: Longest uninterrupted stretch -* LUS+: Notation combining multiple allele designations including RU, LUS, secondary motif (if applicable) and tertiary motif (if applicable) +* LUS+: Notation combining multiple allele designations including CE, LUS, secondary motif (if applicable) and tertiary motif (if applicable) * Reads: number of reads observed with the specified sequence -If the ```--include-sex``` flag is included, a second table with the above columns for the sex chromosome loci will be outputted as well. +If including the sex chromosome loci as specified in the config file, a second table with the above columns for the sex chromosome loci will be outputted as well. -**NOTE** on including the sex chromosome STR loci: in the ```annotate``` step, lusSTR requires two files for input: (1) the properly formatted file of autosomal STR loci produced from the ```format``` command (or a file with the appropriate format) with a label such as ```lusSTRinput.csv```, and (2) the properly formatted file of X- and Y-STR loci produced from the ```format``` command with the ```--include-sex``` flag (or a file with the appropriate format) labeled as ```lusSTRinput_sexloci.csv```. The file containing the X- and Y-STR loci *must* have the identical file name to the file containing the autosomal STRs but with ```_sexloci.csv``` (see above for precise examples). These two files are automatically created (and named appropriately) when using the ```--include-sex``` flag with the ```format``` command. -For the ```annotate``` command, the following must be specified: -* Input filename -* Output filename -* Kit (forenseq or powerseq; default is forenseq) - -```--uas``` flag indicates the sequences are only of the UAS region; otherwise, lusSTR assumes full length sequences. -```--include-sex``` flag indicates to include the sex chromosome loci. - -``` -lusstr annotate -o --kit forenseq --uas --include-sex -``` -Example: -``` -lusstr annotate UAS_test_file.csv -o UAS_final_table.txt --kit forenseq --uas -``` - -If no ```--uas``` flag is provided, several additional processes occur with the ```annotate``` command: +If STRait Razor data is specified, several additional processes occur with the ```convert``` step: * The full sequences are filtered to the UAS region before the translation step. The number of bases to remove is determined based on the specified kit. -* Once the sequences are filtered to the UAS region, any duplicated sequences are removed and their reads are summed in with the remaining sequence ```Reads``` column. NOTE: This step can be skipped with the ```--nocombine``` flag. +* Once the sequences are filtered to the UAS region, any duplicated sequences are removed and their reads are summed in with the remaining sequence ```Reads``` column. NOTE: This step can be skipped with the ```nocombine``` setting in the config file. -Further, a second table (labeled as ```*_flanks_anno.txt```) containing information related to the flanking sequences surrounding the UAS sequence region is also produced with the following columns: +Further, a second table (labeled as ```*_flanks.txt```) containing information related to the flanking sequences surrounding the UAS sequence region is also produced with the following columns: * Sample ID * Project ID * Analysis ID (same as Project ID) * Locus * Reads: number of reads observed for the specified sequence -* Length-based Allele +* Length-based (CE) Allele * Full Sequence * 5' Flanking Sequence Bracketed Notation * UAS Region Sequence Bracketed Notation (same as column ```UAS Output Bracketed Notation``` in the main table) @@ -149,106 +152,30 @@ Further, a second table (labeled as ```*_flanks_anno.txt```) containing informat The ```Potential Issues``` column in this report is to draw attention to potential problem sequences (due to perhaps an indel or partial sequence) and requires the attention of the user to further evaluate the sequence for it's authenticity. -Example: -``` -lusstr annotate STRaitRazor_test_file.csv -o STRaitRazor_powerseq_final.txt --kit powerseq -``` -The above example would produce two files: (1) ```STRaitRazor_powerseq_final.txt``` and (2) ```STRaitRazor_powerseq_final_flanks_anno.txt```. - -If the ```--include-sex``` flag is included, as below: -``` -lusstr annotate STRaitRazor_test_file.csv -o STRaitRazor_powerseq_final.txt --kit powerseq --include-sex -``` - Two additional tables will be produced: (1) ```STRaitRazor_powerseq_final_sexloci.txt``` and (2) ```STRaitRazor_powerseq_final_sexloci_flanks_anno.txt``` for translation of the sex chromosome loci and their flanking regions. +--- - ## SNP Data Processing +### Filtering Alleles/Sequences and Creation of Files for Use in ProbGen Software - The ```snp``` command produces tab-delineated table with the following columns: - * Sample ID - * Project ID - * Analysis ID (same as Project ID) - * SNP (rsID) - * Reads: number of reads observed for the specified allele - * Foward Strand Allele: allele call on the forward strand - * UAS Allele: allele call as reported from the UAS - * Type: SNP type (identity/phenotype/ancestry) - * Issues: Indicates if called allele is one of two expected alleles for SNP - -If STRait Razor data is used as input, the number of reads for identical alleles within a SNP are combined in the above table. Further, if STRait Razor data is used as input, a second table (```*_full_output.txt```) is produced containing information for each sequence (not combined) with the following columns: - * Sample ID - * Project ID - * Analysis ID - * SNP - * Sequence: sequence containing the SNP of interest - * Reads - * Forward Strand Allele - * UAS Allele - * Type - * Potential issues: flags sequences which may contains errors, such as an unexpected allele call or short than expected sequence length. - - ### Usage - - ``` - lusstr snps -o --type --uas - ``` - -The ```snp``` command requires a folder of either UAS Reports (Sample Details Report(s) and/or Phenotype Report(s)) or STRait Razor output file(s). -The ```-o``` flag specifies the name of the output file (should end in ```.txt```) -The ```--type``` flag specifies the type of SNPs to include in the output file(s). The options are: ```all``` (all SNPs), ```i``` (identity SNPs only), or ```p``` (ancestry and phenotype SNPs only). The default is ```i```. -Similar to the processing of STR loci sequences, the ```--uas``` flag indicates the input files are Reports from the UAS. Absence of this flag indicates the provided files are STRait Razor output files. - -**Examples**: -``` -lusstr snps UAS_files/ -o uas_output_all.txt --type all --uas -``` -``` -lusstr snps STRait_Razor_output/ -o strait_razor_p.txt --type p -``` - -## Filtering RU alleles and Creation of Files for Use in ProbGen Software - - -The ```filter``` command provides the opportunity to filter sequences using thresholds such as: +The ```filter``` step provides the opportunity to filter sequences using thresholds such as: * Detection threshold (both static and dynamic) * Analytical threshold (both static and dynamic) * Same size threshold (dynamic) Custom static and dynamic thresholds for each locus are stored in the ```filters.json``` file. This file should be updated to utilize validated thresholds for individual labs. -In addition, stutter alleles can be identified using the ```--info``` flag. This creates a separate file containing information about each allele, including an allele classification (```real allele```, ```stutter``` or ```noise```). Stutter alleles are classified as either ```-1 stutter```, ```-2 stutter```, or ```+1 stutter```. For these stutter alleles, the stuttering allele is reported along with the percent stutter (# of reads for that allele/# of reads for stuttering allele). In instances where a stutter allele could be multiple different types of stutter, all potential designations will be reported as such: ```-1 stutter/-2 stutter```, ```-1 stutter/+1 stutter```, or ```-2 stutter/+1 stutter```. No percent stutter is calculated for these alleles. If a sequence is identified as noise, the percent noise is calculated (# of reads for that sequence/total locus reads). +In addition, stutter alleles can be identified using the ```info``` setting in the config file. This creates a separate file containing information about each allele, including an allele classification (```real allele```, ```stutter``` or ```BelowAT```). Stutter alleles are classified as either ```-1 stutter```, ```-2 stutter```, or ```+1 stutter```. For these stutter alleles, the stuttering allele is reported along with the percent stutter (# of reads for that allele/# of reads for stuttering allele). In instances where a stutter allele could be multiple different types of stutter, all potential designations will be reported as such: ```-1 stutter/-2 stutter```, ```-1 stutter/+1 stutter```, or ```-2 stutter/+1 stutter```. No percent stutter is calculated for these alleles. If a sequence is identified as noise, the percent noise is calculated (# of reads for that sequence/total locus reads). Each locus is checked for containing greater than 2 alleles (indicating a potential mixture) and for intralocus imbalance. If either are identified, a separate file (```Flagged_Loci.csv```) is created, containing the SampleID, Locus and either ```>2Alleles``` or ```IntraLocusImbalance```. -When using STRmix data, the data type can be specified using the ```--data-type``` flag as either ```ce``` or ```ngs``` (default is ```ce```). If ```ngs``` is specified, the same size filter is applied following the stutter filter. Further, the columns and column names in the output file differ based on the data type. +When using STRmix data, the data type can be specified using the ```data-type``` setting as either ```ce``` or ```ngs``` (default is ```ngs```). If ```ngs``` is specified, the same size filter is applied following the stutter filter. Further, the columns and column names in the output file differ based on the data type. -Finally, output files are created for direct use in EuroForMix (EFM) or STRmix. If EFM is specified, a single file is created containing all samples in the input file (however, separate output files for each sample can be created with the ```--separate``` flag). If STRmix is specified, a directory containing files for each individual sample is created. The ```--profile-type``` flag allows for the creation of either a ```reference``` or ```evidence``` profile. Both EuroForMix and STRmix require different formatting depending on the type of sample. +Finally, output files are created for direct use in EuroForMix (EFM) or STRmix. If EFM is specified, a single file is created containing all samples in the input file (however, separate output files for each sample can be created with the ```separate``` setting specified in the config file). If STRmix is specified, a directory containing files for each individual sample is created. The ```profile-type``` setting allows for the creation of either a ```reference``` or ```evidence``` profile. Both EuroForMix and STRmix require different formatting depending on the type of sample. -### Usage -``` -lusstr filter -o --output-type --profile-type --info --no-filters --separate -``` -The ```filter``` command requires the input of a ```.txt``` file produced by the ```lusstr annotate``` command. -The ```-o/--out``` flag specifies the name of the output file (for EFM) or output directory (for STRmix) -```--output-type``` specifies the type of output file created, either ```efm``` or ```strmix```. ```efm``` is the default. -```--profile-type``` specifies the sample type, either ```evidence``` or ```reference```. ```evidence``` is the default. -```--data-type``` specifies the type of data used, either ```ce``` or ```ngs```. ```ce``` is the default. Only applicable to STRmix data. -```--info``` creates the allele information file, containing allele designations (e.g. stutter, noise or real allele) as well as stutter/noise percentages. -The ```--no-filters``` flag will not apply any filters and therefore all alleles present in the input file will be in the created output file(s). -The ```--separate``` flag will indicate to separate samples into individual output files for EFM. STRmix creates separate files by default. +___ -**Examples**: - -``` -lusstr filter experiment01.txt -o experiment01_efm.csv --output-type efm --info -``` - -``` -lusstr filter experiment01.txt -o STRmix_files/ --output-type strmix --profile-type reference --info -``` + ## SNP Data Processing -``` -lusstr filter experiment01.txt -o STRmix_files/ --output-type strmix --data-type ngs --info -``` +Currently under construction ---- diff --git a/lusSTR/__init__.py b/lusSTR/__init__.py index c7136cd0..f0d54adf 100644 --- a/lusSTR/__init__.py +++ b/lusSTR/__init__.py @@ -10,14 +10,17 @@ # Development Center. # ------------------------------------------------------------------------------------------------- -from lusSTR import annot -from lusSTR import marker -from lusSTR import repeat -from lusSTR import format -from lusSTR import snps -from lusSTR import filter +from pkg_resources import resource_filename from lusSTR import cli -from ._version import get_versions +from lusSTR._version import get_versions __version__ = get_versions()["version"] del get_versions + + +def snakefile(workflow="strs"): + return resource_filename("lusSTR", f"workflows/{workflow}.smk") + + +def wrapper(label): + return resource_filename("lusSTR", f"wrappers/{label}.py") diff --git a/lusSTR/cli.py b/lusSTR/cli.py deleted file mode 100644 index fe834ea2..00000000 --- a/lusSTR/cli.py +++ /dev/null @@ -1,211 +0,0 @@ -# ------------------------------------------------------------------------------------------------- -# Copyright (c) 2020, DHS. -# -# This file is part of lusSTR (http://github.com/bioforensics/lusSTR) and is licensed under -# the BSD license: see LICENSE.txt. -# -# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National -# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the -# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and -# Development Center. -# ------------------------------------------------------------------------------------------------- - -import argparse -import lusSTR -from . import format, annot, snps, filter - - -def format_subparser(subparsers): - cli = subparsers.add_parser("format") - cli.add_argument( - "-o", - "--out", - metavar="FILE", - help="File to which output will be written; default is terminal (stdout)", - ) - cli.add_argument( - "input", - help="Input is either a UAS Sample Details Report (in .xlsx format) or a STRait Razor " - "output file (.txt format). Both single files and directories containing multiple UAS or " - "STRait Razor files are accepted. If input is the UAS Sample Details Report, use of the " - "--uas flag is required. If a directory of STRait Razor files is provided, the name of " - "the directory will be used as the Project and Analysis IDs in the final annotation " - "table. If a single file is provided, the Project and Analysis IDs will be NA. " - "STRaitRazor files should be named as the Sample ID, e.g. A001.txt, A002.txt, etc.", - ) - cli.add_argument( - "--uas", - action="store_true", - help="Use if sequences have been previously run through the ForenSeq UAS.", - ) - cli.add_argument( - "--include-sex", - dest="sex", - action="store_true", - help="Use if including the X and Y STR markers", - ) - - -def annot_subparser(subparsers): - cli = subparsers.add_parser("annotate") - cli.add_argument( - "-o", - "--out", - metavar="FILE", - help="file to which output will be written; default is terminal (stdout). If the " - "--separate flag is used, this will be the name of the directory which the individual " - "files are written to.", - ) - cli.add_argument( - "input", - help="sample(s) in CSV format; first four columns must be Locus, NumReads, " - "Sequence, SampleID; Optional last two columns can be Project and Analysis.", - ) - cli.add_argument( - "--kit", - choices=["forenseq", "powerseq"], - default="forenseq", - help="Kit used to develop sequences; only forenseq or powerseq accepted;" - "default = forenseq", - ) - cli.add_argument( - "--uas", - action="store_true", - help="Use if sequences have been run through the ForenSeq UAS.", - ) - cli.add_argument( - "--nocombine", - dest="combine", - action="store_false", - help="Do not combine read counts for duplicate sequences within the UAS region. " - "By default, read counts are combined for sequences not run through the UAS.", - ) - cli.add_argument( - "--include-sex", - dest="sex", - action="store_true", - help="Use if including the X and Y STR markers. Separate reports for these markers " - "will be created.", - ) - cli.add_argument( - "--separate", - action="store_true", - help="This flag will result in the creation of individual output files per sample.", - ) - - -def snps_subparser(subparsers): - cli = subparsers.add_parser("snps") - cli.add_argument( - "-o", - "--out", - metavar="FILE", - help="file to which output will be written; default is terminal (stdout)", - ) - cli.add_argument( - "input", - help="Input is either a directory of either UAS output files (Sample Details Report and " - "Phenotype Report) or of STRait Razor output files. If input is the UAS output file(s) " - "(in .xlsx format), use of the --uas flag is required. If STRait Razor output is " - "used, the name of the provided directory will be used as the Analysis ID in the " - "final annotation table.", - ) - cli.add_argument( - "--type", - choices=["all", "p", "i"], - default="i", - help='Specify the type of SNPs to include in the final report. "p" will include only the ' - 'Phenotype and Ancestry SNPs; "i" will include only the Identity SNPs; and "all" will ' - "include all SNPs. Default is Identity SNPs only (i).", - ) - cli.add_argument( - "--uas", - action="store_true", - help="Use if sequences have been run through the ForenSeq UAS.", - ) - cli.add_argument( - "--separate", - action="store_true", - help="This flag will result in the creation of individual output files per sample.", - ) - - -def filter_subparser(subparsers): - cli = subparsers.add_parser("filter") - cli.add_argument("input", help="Input is a single lusSTR output file (.txt format)") - cli.add_argument( - "--separate", - action="store_true", - help="Used to create separate final output files for each Sample. If not used, a single " - "file containing all samples will be created.", - ) - cli.add_argument( - "--info", - action="store_true", - help="Use to create a text document containing additional information on filtered " - "sequences and stutter.", - ) - cli.add_argument( - "--output-type", - dest="output", - choices=["efm", "strmix"], - default="efm", - help='Choose the file format of the output file, either "efm" or "strmix". ' - "Default is efm.", - ) - cli.add_argument( - "--no-filters", - dest="nofilters", - action="store_true", - help="Used to skip all filtering steps. All input alleles will be included in the output.", - ) - cli.add_argument( - "--out", - "-o", - metavar="FILE", - help="Name of output file containing all samples for EFM or name/path of directory for " - "STRmix. If separate files are specified for EFM, the sample ID will be used as the " - "filename. Output files are in CSV format.", - ) - cli.add_argument( - "--profile-type", - dest="profile", - choices=["evidence", "reference"], - default="evidence", - help="Choose the type of profile, either evidence or reference. Default is evidence.", - ) - cli.add_argument( - "--data-type", - dest="data", - choices=["ngs", "ce"], - default="ce", - help="Choose the type of data, either ngs or ce. Default is ce." - "**This is only applicable to STRmix evidence data.**", - ) - - -mains = { - "format": lusSTR.format.main, - "annotate": lusSTR.annot.main, - "snps": lusSTR.snps.main, - "filter": lusSTR.filter.main, -} - -subparser_funcs = { - "format": format_subparser, - "annotate": annot_subparser, - "snps": snps_subparser, - "filter": filter_subparser, -} - - -def get_parser(): - parser = argparse.ArgumentParser() - parser.add_argument( - "-v", "--version", action="version", version="lusSTR v" + lusSTR.__version__ - ) - subcommandstr = ", ".join(sorted(subparser_funcs.keys())) - subparsers = parser.add_subparsers(dest="subcmd", metavar="subcmd", help=subcommandstr) - for func in subparser_funcs.values(): - func(subparsers) - return parser diff --git a/lusSTR/cli/__init__.py b/lusSTR/cli/__init__.py new file mode 100644 index 00000000..e6fae75d --- /dev/null +++ b/lusSTR/cli/__init__.py @@ -0,0 +1,41 @@ +import argparse +import lusSTR +from lusSTR.cli import config +from lusSTR.cli import strs +from lusSTR.cli import snps +import snakemake + + +mains = { + "config": config.main, + "strs": strs.main, + "snps": snps.main +} + +subparser_funcs = { + "config": config.subparser, + "strs": strs.subparser, + "snps": snps.subparser +} + + +def main(args=None): + if args is None: + args = get_parser().parse_args() + if args.subcmd is None: + get_parser().parse_args(["-h"]) + mainmethod = mains[args.subcmd] + result = mainmethod(args) + return result + + +def get_parser(): + parser = argparse.ArgumentParser() + parser.add_argument( + "-v", "--version", action="version", version="lusSTR v" + lusSTR.__version__ + ) + subcommandstr = ", ".join(sorted(subparser_funcs.keys())) + subparsers = parser.add_subparsers(dest="subcmd", metavar="subcmd", help=subcommandstr) + for func in subparser_funcs.values(): + func(subparsers) + return parser diff --git a/lusSTR/cli/config.py b/lusSTR/cli/config.py new file mode 100644 index 00000000..0ad64916 --- /dev/null +++ b/lusSTR/cli/config.py @@ -0,0 +1,104 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2020, DHS. +# +# This file is part of lusSTR (http://github.com/bioforensics/lusSTR) and is licensed under +# the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- + +import argparse +import lusSTR +import os +from pathlib import Path +from pkg_resources import resource_filename +import yaml + + +def main(args): + Path(args.workdir).mkdir(parents=True, exist_ok=True) + final_dest = f"{args.workdir}/config.yaml" + config = resource_filename("lusSTR", "data/config.yaml") + final_config = edit_config(config, args) + with open(final_dest, "w") as file: + yaml.dump(final_config, file) + +def edit_config(config, args): + with open(config, "r") as file: + data = yaml.safe_load(file) + if args.straitrazor: + data["uas"] = False + if args.powerseq: + data["kit"] = "powerseq" + if args.input: + data["samp_input"] = args.input + else: + data["samp_input"] = os.getcwd() + if args.out: + data["output"] = args.out + if args.sex: + data["sex"] = True + if args.separate: + data["separate"] = True + if args.nocombine: + data["nocombine"] = True + if args.nofiltering: + data["nofilters"] = True + if args.noinfo: + data["info"] = False + if args.reference: + data["profile_type"] = "reference" + if args.ce: + data["data_type"] = "ce" + if args.efm: + data["output_type"] = "efm" + return data + + +def subparser(subparsers): + p = subparsers.add_parser("config", description="Create config file for running STR pipeline") + p.add_argument( + "-w", "--workdir", metavar="W", default=".", + help="directory to add config file; default is current working directory") + p.add_argument( + "--straitrazor", action="store_true", + help="Use if sequences have been previously run through STRait Razor." + ) + p.add_argument("--input", help="Input file or directory") + p.add_argument("--out", "-o", help="Output file/directory name") + p.add_argument( + "--powerseq", action="store_true", + help="Use to indicate sequences were created using the PowerSeq Kit." + ) + p.add_argument( + "--sex", action="store_true", + help="Use if including the X and Y STR markers. Separate reports for these markers " + "will be created.", + ) + p.add_argument( + "--nocombine", action="store_true", + help="Do not combine read counts for duplicate sequences within the UAS region " + "during the 'convert' step. By default, read counts are combined for sequences " + "not run through the UAS.", + ) + p.add_argument( + "--reference", action="store_true", + help="Use for creating Reference profiles" + ) + p.add_argument("--efm", action="store_true",help="Use to create EuroForMix profiles") + p.add_argument("--ce", action="store_true", help="Use for CE data") + p.add_argument( + "--noinfo", action="store_true", + help="Use to not create the Sequence Information File in the 'filter' step" + ) + p.add_argument( + "--separate", action="store_true", + help="Use to separate EFM profiles in the 'filter' step." + ) + p.add_argument( + "--nofiltering", action="store_true", + help="Use to perform no filtering during the 'filter' step" + ) diff --git a/lusSTR/cli/snps.py b/lusSTR/cli/snps.py new file mode 100644 index 00000000..90615845 --- /dev/null +++ b/lusSTR/cli/snps.py @@ -0,0 +1,25 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2020, DHS. +# +# This file is part of lusSTR (http://github.com/bioforensics/lusSTR) and is licensed under +# the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- + +import lusSTR +import snakemake + +## placeholder until I update this + +def main(args): + raise NotImplementedError('SNP workflow implementation pending') + +def subparser(subparsers): + p = subparsers.add_parser("snps", description="Running the entire STR pipeline (format, annotate and filter)") + p.add_argument("--config", default="config.yaml", help="config file used to identify settings.") + p.add_argument("-w", "--workdir", metavar="W", default=".", help="working directory") + p.add_argument("--skip-filter", dest="filter", action = "store_true", help="Skip filtering step") diff --git a/lusSTR/cli/strs.py b/lusSTR/cli/strs.py new file mode 100644 index 00000000..d1f1510e --- /dev/null +++ b/lusSTR/cli/strs.py @@ -0,0 +1,37 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2020, DHS. +# +# This file is part of lusSTR (http://github.com/bioforensics/lusSTR) and is licensed under +# the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- + +import argparse +import lusSTR +from snakemake import snakemake + + +def main(args): + pretarget = args.target if args.target != "all" else "filter" + workdir = args.workdir + result = snakemake( + lusSTR.snakefile(workflow="strs"), targets=[pretarget], workdir=workdir + ) + if result is not True: + raise SystemError('Snakemake failed') + +def subparser(subparsers): + p = subparsers.add_parser( + "strs", description="Running the STR pipeline" + ) + p.add_argument( + "target", choices=["format", "convert", "all"], + help="Steps to run. Specifying 'format' will run only 'format'. Specifying " + "'convert' will run both 'format' and 'convert'. Specifying 'all' will run " + "all steps of the STR workflow ('format', 'convert' and 'filter')." + ) + p.add_argument("-w", "--workdir", metavar="W", default=".", help="working directory") diff --git a/lusSTR/__main__.py b/lusSTR/data/__init__.py similarity index 67% rename from lusSTR/__main__.py rename to lusSTR/data/__init__.py index 6d4181ae..78e86fc5 100644 --- a/lusSTR/__main__.py +++ b/lusSTR/data/__init__.py @@ -9,16 +9,3 @@ # National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and # Development Center. # ------------------------------------------------------------------------------------------------- - -import argparse -import lusSTR -import sys - - -def main(args=None): # pragma: no cover - if args is None: # pragma: no cover - if len(sys.argv) == 1: - lusSTR.cli.get_parser().parse_args(["-h"]) - args = lusSTR.cli.get_parser().parse_args() - mainmethod = lusSTR.cli.mains[args.subcmd] - mainmethod(args) diff --git a/lusSTR/data/config.yaml b/lusSTR/data/config.yaml new file mode 100644 index 00000000..1c905695 --- /dev/null +++ b/lusSTR/data/config.yaml @@ -0,0 +1,20 @@ +%YAML 1.2 +--- + +## general settings +uas: True ## True/False; if ran through UAS +sex: False ## True/False; include sex-chromosome STRs +samp_input: "/path/to/input/directory/or/samples" ## input directory or sample; if not provided, will be cwd +output: "lusstr_output" ## output file/directory name; Example: "test_030923" + +##convert settings +kit: "forenseq" ## forenseq/powerseq +nocombine: False ## True/False; do not combine identical sequences (if using STRait Razor data) + +##filter settings +output_type: "strmix" ## strmix/efm +profile_type: "evidence" ## evidence/reference +data_type: "ngs" ## ce/ngs +info: True ## True/False; create allele information file +separate: False ##True/False; for EFM only, if True will create individual files for samples; if False, will create one file with all samples +nofilters: False ##True/False; skip all filtering steps but still creates EFM/STRmix output files \ No newline at end of file diff --git a/lusSTR/filters.json b/lusSTR/data/filters.json similarity index 100% rename from lusSTR/filters.json rename to lusSTR/data/filters.json diff --git a/lusSTR/snp_data.json b/lusSTR/data/snp_data.json similarity index 100% rename from lusSTR/snp_data.json rename to lusSTR/data/snp_data.json diff --git a/lusSTR/str_markers.json b/lusSTR/data/str_markers.json similarity index 100% rename from lusSTR/str_markers.json rename to lusSTR/data/str_markers.json diff --git a/lusSTR/scripts/__init__.py b/lusSTR/scripts/__init__.py new file mode 100644 index 00000000..78e86fc5 --- /dev/null +++ b/lusSTR/scripts/__init__.py @@ -0,0 +1,11 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2020, DHS. +# +# This file is part of lusSTR (http://github.com/bioforensics/lusSTR) and is licensed under +# the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- diff --git a/lusSTR/filter_settings.py b/lusSTR/scripts/filter_settings.py similarity index 99% rename from lusSTR/filter_settings.py rename to lusSTR/scripts/filter_settings.py index fd06eca6..88f51ccd 100644 --- a/lusSTR/filter_settings.py +++ b/lusSTR/scripts/filter_settings.py @@ -19,7 +19,7 @@ def get_filter_metadata_file(): - return resource_filename("lusSTR", "filters.json") + return resource_filename("lusSTR", "data/filters.json") with open(get_filter_metadata_file(), "r") as fh: diff --git a/lusSTR/marker.py b/lusSTR/scripts/marker.py similarity index 95% rename from lusSTR/marker.py rename to lusSTR/scripts/marker.py index f19745ee..1a9f06d0 100644 --- a/lusSTR/marker.py +++ b/lusSTR/scripts/marker.py @@ -12,14 +12,18 @@ import json import lusSTR -from lusSTR.annot import get_str_metadata_file, split_sequence_into_two_strings -from lusSTR.repeat import collapse_repeats_by_length, collapse_repeats_by_length_flanks -from lusSTR.repeat import sequence_to_bracketed_form -from lusSTR.repeat import reverse_complement, reverse_complement_bracketed -from lusSTR.repeat import repeat_copy_number, collapse_all_repeats, split_by_n +from lusSTR.scripts.repeat import collapse_repeats_by_length, collapse_repeats_by_length_flanks +from lusSTR.scripts.repeat import sequence_to_bracketed_form, split_sequence_into_two_strings +from lusSTR.scripts.repeat import reverse_complement, reverse_complement_bracketed +from lusSTR.scripts.repeat import repeat_copy_number, collapse_all_repeats, split_by_n +from pkg_resources import resource_filename import re +def get_str_metadata_file(): + return resource_filename("lusSTR", "data/str_markers.json") + + with open(get_str_metadata_file(), "r") as fh: str_marker_data = json.load(fh) @@ -258,7 +262,7 @@ def do_split(self): return not self.cannot_split or self.split_compatible @property - def annotation(self): + def convert(self): bylength = ( self.split_compatible or (self.data["ReverseCompNeeded"] == "Yes" and self.split_compatible) @@ -274,19 +278,19 @@ def annotation(self): return collapseseq @property - def annotation_uas(self): + def convert_uas(self): if self.data["ReverseCompNeeded"] == "Yes": - return reverse_complement_bracketed(self.annotation) - return self.annotation + return reverse_complement_bracketed(self.convert) + return self.convert @property def designation(self): lus, sec, ter = None, None, None - lus = repeat_copy_number(self.annotation, self.data["LUS"]) + lus = repeat_copy_number(self.convert, self.data["LUS"]) if self.data["Sec"] != "": - sec = repeat_copy_number(self.annotation, self.data["Sec"]) + sec = repeat_copy_number(self.convert, self.data["Sec"]) if self.data["Tert"] != "": - ter = repeat_copy_number(self.annotation, self.data["Tert"]) + ter = repeat_copy_number(self.convert, self.data["Tert"]) return lus, sec, ter @property @@ -304,8 +308,8 @@ def summary(self): return [ self.uas_sequence, self.forward_sequence, - self.annotation_uas, - self.annotation, + self.convert_uas, + self.convert, canon, lus_final_output, lus_plus, @@ -367,7 +371,7 @@ def flank_5p(self): return flank @property - def annotation(self): + def convert(self): if len(self.uas_sequence) < 110: bracketed_form = collapse_repeats_by_length(self.uas_sequence, 4) else: @@ -453,7 +457,7 @@ def flank_5p(self): return flank @property - def annotation(self): + def convert(self): """ Function to correctly bracket microvariants in the D7S820 locus. @@ -488,9 +492,9 @@ def annotation(self): @property def designation(self): lus, sec, ter = None, None, None - lus = repeat_copy_number(self.annotation, self.data["LUS"]) - sec = repeat_copy_number(self.annotation, self.data["Sec"]) - if str(self.annotation)[-1] == "T" and isinstance(self.canonical, str): + lus = repeat_copy_number(self.convert, self.data["LUS"]) + sec = repeat_copy_number(self.convert, self.data["Sec"]) + if str(self.convert)[-1] == "T" and isinstance(self.canonical, str): ter = 1 else: ter = 0 @@ -558,11 +562,11 @@ def flank_3p(self): return flank @property - def annotation(self): - """Bracketed annotation for D1S1656 + def convert(self): + """Bracketed sequence form for D1S1656 This function identifies if the sequence is a microvariant in order to call different - functions to create the bracketed annotation. + functions to create the bracketed form. """ sequence = self.forward_sequence sequence_filt = sequence[2:] @@ -618,8 +622,8 @@ def designation(self): return lus, sec, ter @property - def annotation(self): - """Bracketed annotation for PentaD + def convert(self): + """Bracketed sequence form for PentaD If the sequence is >= 18bp, the flanking region (first 5 bases) is first split off in the sequence to preserve that sequence. Then the repeat units are identified and bracketed. @@ -738,8 +742,8 @@ def flank_3p(self): return "" @property - def annotation(self): - """Bracketed annotation for FGA + def convert(self): + """Bracketed sequence form for FGA Specialized handling is required because which repeat unit should be identified differs based on its location in the sequence. For example, the 'GGAA' repeat should be identified @@ -747,7 +751,7 @@ def annotation(self): sequence; and the repeat 'AAAG' should be identified within the two end repeats. Simply identifying repeat units in a specified order does not result in the final - annotation which is consistent with previously published annotation for this locus. + form which is consistent with the previously published sequence form for this locus. """ sequence = self.forward_sequence if len(sequence) % self.repeat_size == 0 or (not ("GGAA") in sequence): @@ -849,7 +853,7 @@ def flank_3p(self): return flank @property - def annotation(self): + def convert(self): if isinstance(self.canonical, str): return sequence_to_bracketed_form(self.uas_sequence, self.repeat_size, self.repeats) elif isinstance(self.canonical, int): @@ -880,13 +884,13 @@ def flank_3p(self): return flank @property - def annotation(self): - """Bracketed annotation for D21 + def convert(self): + """Bracketed sequence form for D21 A specialized function is required for this locus due to the potential end of the sequence containing 'TA TCTA' and other variants. This sequence needs to remain intact to conform - with the conventional annotation for this particular locus. However, if the 'TATCTA' is - included in a repeat unit, the repeat unit needs to be reported (i.e. [TCTA]2). + with the conventional bracketed form for this particular locus. However, if the 'TATCTA' + is included in a repeat unit, the repeat unit needs to be reported (i.e. [TCTA]2). """ forward_strand_brack_form = sequence_to_bracketed_form( self.uas_sequence, self.data["NumBasesToSeparate"], self.repeats @@ -930,7 +934,7 @@ def designation(self): Special handling is required because the LUS repeat motif is the last 'TCTA' repeat set and the secondary repeat motif is the first set of 'TCTA' repeats in the sequence. """ - sequence = self.annotation + sequence = self.convert repeat = self.data["LUS"] remaining = list() lus_sec = list() @@ -962,8 +966,8 @@ def designation(self): lus_allele = 0 sec_allele = lus_sec[0] finalcount = 0 - for m in re.finditer(self.data["Tert"], self.annotation): - count = self.annotation[m.end() + 1 : m.end() + 3] + for m in re.finditer(self.data["Tert"], self.convert): + count = self.convert[m.end() + 1 : m.end() + 3] if count == "" or count[0] == "[" or count[0] == " " or count.isalpha(): count = 1 try: @@ -981,8 +985,8 @@ def designation(self): class STRMarker_TH01(STRMarker): @property - def annotation(self): - """Bracketed annotation for TH01 + def convert(self): + """Bracketed sequence form for TH01 Special handling is required for the microvariants of the TH01 locus because of the insertion of the 'ATG' between the repeat units 'AATG'. @@ -1047,15 +1051,16 @@ def flank_3p(self): class STRMarker_D19S433(STRMarker): @property - def annotation(self): - """Bracketed annotation for D19S433 + def convert(self): + """Bracketed sequence form for D19S433 A specialized function is required for this locus. The sequence is first broken into two different strings. The two sets of sequences are processed separately in order to identify the potential presence of a deletion in either sequence. Simply identifying repeat units in a specified order does not result in the final - annotation which is consistent with previously published annotation for this locus. + bracketed form which is consistent with the previously published bracketed form for this + locus. """ sequence = self.forward_sequence final = list() @@ -1129,7 +1134,7 @@ def designation(self): repeats. """ lus, sec, ter = None, None, None - anno = self.annotation + anno = self.convert repeat = "TCT" match_list = [] for block in anno.split(" "): @@ -1200,7 +1205,7 @@ def flank_3p(self): class STRMarker_DYS522(STRMarker): @property - def annotation(self): + def convert(self): sequence = self.forward_sequence final_seq = f"{sequence[:3]} {collapse_repeats_by_length(sequence[3:], 4)}" return final_seq @@ -1226,7 +1231,7 @@ def canonical(self): return canon_allele @property - def annotation(self): + def convert(self): sequence = self.forward_sequence if self.kit == "powerseq" or (len(sequence) % 4 != 0): final_seq = sequence_to_bracketed_form(sequence, self.repeat_size, self.repeats) @@ -1290,7 +1295,7 @@ def canonical(self): return canon_allele @property - def annotation(self): + def convert(self): sequence = self.forward_sequence if self.kit == "powerseq": final_seq = ( @@ -1320,7 +1325,7 @@ def flank_3p(self): class STRMarker_DYS458(STRMarker): @property - def annotation(self): + def convert(self): sequence = self.forward_sequence final_string = ( f"{collapse_repeats_by_length(sequence[:14], 4)} " @@ -1372,7 +1377,7 @@ def flank_3p(self): class STRMarker_DXS10135(STRMarker): @property - def annotation(self): + def convert(self): sequence = self.forward_sequence final_string = ( f"{collapse_repeats_by_length(sequence[:12], 4)} " @@ -1403,7 +1408,7 @@ def flank_3p(self): class STRMarker_Y_GATA_H4(STRMarker): @property - def annotation(self): + def convert(self): sequence = self.forward_sequence if self.kit == "powerseq": final_string = collapse_repeats_by_length(sequence, self.repeat_size) @@ -1454,12 +1459,12 @@ def canonical(self): @property def designation(self): lus, sec, ter = None, None, None - lus = repeat_copy_number(self.annotation, self.data["LUS"]) - sec = repeat_copy_number(self.annotation, self.data["Sec"]) + lus = repeat_copy_number(self.convert, self.data["LUS"]) + sec = repeat_copy_number(self.convert, self.data["Sec"]) if self.uas or self.kit == "powerseq": - ter = repeat_copy_number(self.annotation, self.data["Tert"]) + ter = repeat_copy_number(self.convert, self.data["Tert"]) else: - if self.annotation[-1] == "G": + if self.convert[-1] == "G": ter = "1" else: ter = "0" @@ -1496,7 +1501,7 @@ class STRMarker_DYS448(STRMarker): @property def designation(self): lus, sec, ter = None, None, None - anno = self.annotation + anno = self.convert repeat = "AGAGAT" match_list = [] for block in anno.split(" "): @@ -1535,7 +1540,7 @@ def designation(self): is identified as the "TAGA" repeat sequence with the largest number of repeats. """ lus, sec, ter = None, None, None - anno = self.annotation + anno = self.convert repeat = "TAGA" match_list = [] for block in anno.split(" "): @@ -1570,7 +1575,7 @@ def designation(self): is identified as the "TAGA" repeat sequence with the largest number of repeats. """ lus, sec, ter = None, None, None - anno = self.annotation + anno = self.convert repeat = "TAGA" match_list = [] for block in anno.split(" "): diff --git a/lusSTR/repeat.py b/lusSTR/scripts/repeat.py similarity index 85% rename from lusSTR/repeat.py rename to lusSTR/scripts/repeat.py index c89655f6..7175e56b 100644 --- a/lusSTR/repeat.py +++ b/lusSTR/scripts/repeat.py @@ -13,6 +13,22 @@ import re +def split_sequence_into_two_strings(sequence, repeat_for_split): + """ + Function to split a sequence into two separate strings at a specified repeat unit. + """ + last = 0 + prev = 0 + for m in re.finditer(repeat_for_split, sequence): + if m.start() == prev or m.start() == last or prev == 0: + prev = m.end() + else: + last = m.end() + first_string = sequence[:prev] + second_string = sequence[prev:] + return first_string, second_string + + def collapse_tandem_repeat(fullseq, repeat): """Collapse tandem stretches of the specified repeat sequence in a larger sequence. @@ -78,7 +94,7 @@ def get_blocks(sequence, n, rev=False): def collapse_repeats_by_length(sequence, n): - """Convert to bracketed annotation form by splitting the sequence into blocks of size n.""" + """Convert to bracketed sequence form by splitting the sequence into blocks of size n.""" units = list() for unit, count in get_blocks(sequence, n, False): assert unit is not None, (sequence, n) @@ -92,7 +108,7 @@ def collapse_repeats_by_length(sequence, n): def sequence_to_bracketed_form(sequence, n, repeats): - """Convert sequence to bracketed annotation. + """Convert sequence to bracketed sequence form. Uses a combination of repeat-based and length-based methods to convert a sequence containing tandem repeats into a concise bracketed representation. @@ -124,7 +140,7 @@ def reverse_complement(sequence): def reverse_complement_bracketed(forward_bracket): - """Compute reverse complement of a bracketed form annotation.""" + """Compute reverse complement of a bracketed sequence form.""" inblocks = forward_bracket.split(" ") outblocks = list() for block in reversed(inblocks): @@ -135,7 +151,7 @@ def reverse_complement_bracketed(forward_bracket): rcblock = f"[{rcrep}]{count}" else: if re.match(r"[^ACGT]", block): - raise ValueError(f'annotation block "{block}" includes invalid characters') + raise ValueError(f'bracketed block "{block}" includes invalid characters') rcblock = reverse_complement(block) outblocks.append(rcblock) return " ".join(outblocks) @@ -144,7 +160,7 @@ def reverse_complement_bracketed(forward_bracket): def repeat_copy_number(bf, repeat): """Determine the longest uninterrupted stretch of the specified repeat. - The input is a sequence string collapsed to bracketed annotation form. + The input is a sequence string collapsed to bracketed sequence form. """ longest = 0 for block in bf.split(" "): @@ -160,7 +176,7 @@ def repeat_copy_number(bf, repeat): def collapse_repeats_by_length_flanks(sequence, n): - """Convert to bracketed annotation form by splitting the sequence into blocks of size n.""" + """Convert to bracketed sequence form by splitting the sequence into blocks of size n.""" units = list() for unit, count in get_blocks(sequence, n, True): assert unit is not None, (sequence, n) diff --git a/lusSTR/snps.py b/lusSTR/snps.py deleted file mode 100644 index 40ed609c..00000000 --- a/lusSTR/snps.py +++ /dev/null @@ -1,395 +0,0 @@ -# ------------------------------------------------------------------------------------------------- -# Copyright (c) 2020, DHS. -# -# This file is part of lusSTR (http://github.com/bioforensics/lusSTR) and is licensed under -# the BSD license: see LICENSE.txt. -# -# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National -# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the -# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and -# Development Center. -# ------------------------------------------------------------------------------------------------- - -import glob -import json -import lusSTR -import pandas as pd -import openpyxl -import os -from pkg_resources import resource_filename - - -def get_snp_metadata_file(): - return resource_filename("lusSTR", "snp_data.json") - - -with open(get_snp_metadata_file(), "r") as fh: - snp_marker_data = json.load(fh) - - -snp_type_dict = {"a": "Ancestry", "i": "Identity", "p": "Phenotype", "p/a": "Phenotype;Ancestry"} - - -snps_within_loci = { - "mh16-MC1RB": {"SNPs": ["rs1805005", "rs1805006", "rs2228479"]}, - "mh16-MC1RC": { - "SNPs": [ - "rs11547464", - "rs1805007", - "rs201326893_Y152OCH", - "rs1110400", - "rs1805008", - "rs885479", - ] - }, -} - - -def complement_base(base): - complement = {"A": "T", "C": "G", "G": "C", "T": "A"} - comp_base = complement[base] - return comp_base - - -def uas_format(infile, snp_type_arg): - """ - This function begins with the compiled data from all files within the specified directory. - It removes any allele with Reads of 0; identifies whether the allele call needs to be reverse - complemented to be reported on the forward strand; and checks that the called allele is one of - two expected alleles for the SNP (and flags any SNP call which is unexpected). - """ - data_filt = uas_load(infile, snp_type_arg).reset_index(drop=True) - data_df = [] - for j, row in data_filt.iterrows(): - snpid = data_filt.iloc[j, 0] - metadata = snp_marker_data[snpid] - type = metadata["Type"] - uas_allele = data_filt.iloc[j, 2] - if metadata["ReverseCompNeeded"] == "Yes": - forward_strand_allele = complement_base(uas_allele) - else: - forward_strand_allele = uas_allele - if data_filt.loc[j, "Typed Allele?"] == "No": - flag = "Contains untyped allele" - elif forward_strand_allele in metadata["Alleles"]: - flag = "" - else: - flag = "Allele call does not match expected allele!" - row_tmp = [ - data_filt.loc[j, "SampleID"], - data_filt.loc[j, "Project"], - data_filt.loc[j, "Analysis"], - snpid, - data_filt.loc[j, "Reads"], - forward_strand_allele, - uas_allele, - snp_type_dict[type], - flag, - ] - data_df.append(row_tmp) - data_final = pd.DataFrame( - data_df, - columns=[ - "SampleID", - "Project", - "Analysis", - "SNP", - "Reads", - "Forward_Strand_Allele", - "UAS_Allele", - "Type", - "Issues", - ], - ) - data_final_sort = data_final.sort_values( - by=["SampleID", "Project", "Analysis", "SNP", "Reads"], ascending=False - ) - return data_final_sort - - -def uas_load(indir, type="i"): - """ - This function lists input .xlsx files within the specified directory and performs a check to - ensure the correct file is processed (must contain either "Phenotype" or "Sample Details"). - This also compiles the SNP data for each file within the directory. - """ - snp_final_output = pd.DataFrame() - files = glob.glob(os.path.join(indir, "[!~]*.xlsx")) - for filename in sorted(files): - if "Phenotype" in filename or "Sample Details" in filename: - snps = uas_types(filename, type) - if snps is not None: - snp_final_output = snp_final_output.append(snps) - else: - continue - return snp_final_output - - -def uas_types(infile, snp_type_arg): - """ - This function determines which tab within the specified file is required to extract the SNP - data from based on the name of the file. - """ - if "Sample Details" in infile and (snp_type_arg == "all" or snp_type_arg == "i"): - snp_data = parse_snp_table_from_sheet(infile, "iSNPs", snp_type_arg) - elif "Phenotype" in infile and (snp_type_arg == "all" or snp_type_arg == "p"): - snp_data = parse_snp_table_from_sheet(infile, "SNP Data", snp_type_arg) - else: - snp_data = None - return snp_data - - -def parse_snp_table_from_sheet(infile, sheet, snp_type_arg): - """ - This function formats the SNP data from the original file and filters the SNPs based on the - indicated SNP type. - """ - file = openpyxl.load_workbook(infile) - file_sheet = file[sheet] - table = pd.DataFrame(file_sheet.values) - offset = table[table.iloc[:, 0] == "Coverage Information"].index.tolist()[0] - data = table.iloc[offset + 2 :] - data.columns = table.iloc[offset + 1] - data = data[["Locus", "Reads", "Allele Name", "Typed Allele?"]] - final_df = pd.DataFrame() - if snp_type_arg == "all": - final_df = data - elif snp_type_arg == "i": - filtered_dict = {k: v for k, v in snp_marker_data.items() if "i" in v["Type"]} - filtered_data = data[data["Locus"].isin(filtered_dict)].reset_index(drop=True) - final_df = final_df.append(filtered_data) - else: - filtered_dict = {k: v for k, v in snp_marker_data.items() if "i" not in v["Type"]} - filtered_data = data[data["Locus"].isin(filtered_dict)].reset_index(drop=True) - final_df = final_df.append(filtered_data) - final_df["SampleID"] = table.iloc[2, 1] - final_df["Project"] = table.iloc[3, 1] - final_df["Analysis"] = table.iloc[4, 1] - return final_df - - -def strait_razor_format(infile, snp_type_arg): - """ - This function formats STRait Razor input data for two separate reports. The full output - includes all reads, the SNP allele calls and any results flags. In the main report, the reads - are summed for identical allele calls per SNP. This function also checks that the allele call - is one of two expected alleles for the SNP (and flags the allele if not). - """ - results = strait_razor_concat(infile, snp_type_arg) - results_sort = results.sort_values( - by=["SampleID", "Project", "Analysis", "SNP", "Reads"], ascending=False - ) - results_combine = results_sort.groupby( - ["SNP", "Forward_Strand_Allele", "UAS_Allele", "Type", "SampleID", "Project", "Analysis"], - as_index=False, - )["Reads"].sum() - results_combine = results_combine[ - [ - "SampleID", - "Project", - "Analysis", - "SNP", - "Reads", - "Forward_Strand_Allele", - "UAS_Allele", - "Type", - ] - ] - results_combine["Issues"] = "" - for j, row in results_combine.iterrows(): - snpid = results_combine.iloc[j, 3] - metadata = snp_marker_data[snpid] - if results_combine.iloc[j, 5] not in metadata["Alleles"]: - results_combine.iloc[j, 8] = "Allele call does not match expected allele!" - results_combine_sort = results_combine.sort_values( - by=["SampleID", "Project", "Analysis", "SNP", "Reads"], ascending=False - ) - return results_sort, results_combine_sort - - -def strait_razor_concat(indir, snp_type_arg): - """ - This function reads in all .txt files within the specified directory. For each file, the - forward and reverse reads are summed and each sequence is processed and compiled into one - final dataframe. - """ - snps = pd.DataFrame() - analysisID = os.path.basename(indir.rstrip(os.sep)) - files = glob.glob(os.path.join(indir, "[!~]*.txt")) - for filename in sorted(files): - name = filename.replace(".txt", "").split(os.sep)[-1] - table = pd.read_csv( - filename, - sep="\t", - header=None, - names=["Locus_allele", "Length", "Sequence", "Forward_Reads", "Reverse_Reads"], - ) - try: - table[["SNP", "Bases_off"]] = table.Locus_allele.str.split(":", expand=True) - except ValueError: - print( - f"Error found with {filename}. Will bypass and continue. Please check file" - f" and rerun the command, if necessary." - ) - continue - table["Total_Reads"] = table["Forward_Reads"] + table["Reverse_Reads"] - snps_only = pd.DataFrame(table[table["SNP"].str.contains("rs|mh16|insA")]).reset_index( - drop=True - ) - for j, row in snps_only.iterrows(): - snpid = snps_only.iloc[j, 5] - try: - row = compile_row_of_snp_data(snps_only, snpid, j, snp_type_arg, name, analysisID) - except KeyError: - continue - if row is not None: - snps = snps.append(row) - snps.columns = [ - "SampleID", - "Project", - "Analysis", - "SNP", - "Sequence", - "Reads", - "Forward_Strand_Allele", - "UAS_Allele", - "Type", - "Potential_Issues", - ] - return snps - - -def compile_row_of_snp_data(infile, snp, table_loc, type, name, analysis): - """ - This function is necessary to account for the two sets of SNPs reported from the same - sequence amplicon. Sequences labeled as mh16-MC1RB and mh16-MC1RC contain 3 and 6 SNPs, - respectively. This function reports out each SNP from the sequence amplicon as individual - rows and calls another function to compile data on each SNP. - """ - snp_df = [] - if "mh16" in snp: - locus_data = snps_within_loci[snp] - for k in range(0, len(locus_data["SNPs"])): - snp_id = locus_data["SNPs"][k] - row_tmp = collect_snp_info(infile, snp_id, table_loc, type, name, analysis) - if row_tmp is not None: - snp_df.append(row_tmp) - else: - row_tmp = collect_snp_info(infile, snp, table_loc, type, name, analysis) - if row_tmp is not None: - snp_df.append(row_tmp) - final_snp_df = pd.DataFrame(snp_df) - return final_snp_df - - -def collect_snp_info(infile, snpid, j, type, name, analysis): - """ - This function compiles allele calls, reads, reverse complements allele call if necessary to - match how the UAS reports the allele, and any flags associated with the allele call. The flags - indicate potential issues, including an unexpected allele call (not one of two expected - alleles for the SNP) or unexpected length of the sequence amplicon which could result in an - incorrect allele call. This function also determines if the SNP should be included in the - final table based on the specified SNP type from the CLI. - """ - if snpid == "N29insA": - snpid = "rs312262906_N29insA" - metadata = snp_marker_data[snpid] - snp_type = metadata["Type"] - seq = infile.iloc[j, 2] - expected_alleles = metadata["Alleles"] - snp_loc = metadata["Coord"] - if len(seq) > snp_loc: - snp_call = seq[snp_loc] - if snpid == "rs312262906_N29insA" and snp_call == "A": - snp_call = "insA" - if metadata["ReverseCompNeeded"] == "Yes": - snp_call_uas = complement_base(snp_call) - else: - snp_call_uas = snp_call - if snpid == "rs2402130": - differ_length = len(seq) - 73 - else: - differ_length = int(infile.iloc[j, 6]) - if snp_call not in expected_alleles and differ_length != 0: - if snpid == "rs1821380": - snp_call, allele_flag = snp_call_exception(seq, differ_length, metadata, snp_call) - snp_call_uas = complement_base(snp_call) - else: - allele_flag = ( - "Allele call does not match expected allele! Check for indels " - "(does not match expected sequence length)" - ) - elif snp_call not in expected_alleles: - allele_flag = "Allele call does not match expected allele!" - elif differ_length != 0: - allele_flag = "Check for indels (does not match expected sequence length)" - else: - allele_flag = "" - if ( - (type == "p" and (snp_type == "p" or snp_type == "a" or snp_type == "p/a")) - or (type == "i" and snp_type == "i") - or (type == "all") - ): - row_tmp = [ - name, - analysis, - analysis, - snpid, - seq, - infile.iloc[j, 7], - snp_call, - snp_call_uas, - snp_type_dict[snp_type], - allele_flag, - ] - else: - row_tmp = None - else: - row_tmp = None - return row_tmp - - -def snp_call_exception(seq, expected_size, metadata, base): - """ - This function accounts for insertions and deletions in sequences to identify the correct base - coordinate for the SNP. If the identified allele is still not one of the expected alleles, the - sequence will be flagged appropriately. - """ - new_size = len(seq) + expected_size - new_base_call = seq[new_size] - if new_base_call in metadata["Alleles"]: - flag = ( - "Sequence length different than expected (check for indels); allele position adjusted" - ) - return new_base_call, flag - else: - flag = ( - "Allele call does not match expected allele! Check for indels " - "(does not match expected sequence length)" - ) - return base, flag - - -def indiv_files(table, input_dir, ext): - output_dir = f"Separated_lusstr_Files/{input_dir}" - os.makedirs(output_dir, exist_ok=True) - for samp in table["SampleID"].unique(): - new_df = table[table["SampleID"] == samp] - new_df.to_csv(f"{output_dir}/{samp}{ext}", sep="\t", index=False) - - -def main(args): - output_name = os.path.splitext(args.out)[0] - if args.uas: - results = uas_format(args.input, args.type) - if args.separate: - indiv_files(results, output_name, ".txt") - else: - results.to_csv(args.out, index=False, sep="\t") - else: - results, results_combined = strait_razor_format(args.input, args.type) - if args.separate: - indiv_files(results_combined, output_name, ".txt") - else: - results_combined.to_csv(args.out, index=False, sep="\t") - results.to_csv(f"{output_name}_full_output.txt", index=False, sep="\t") diff --git a/lusSTR/tests/data/2800M_full_anno.txt b/lusSTR/tests/data/2800M_full_convert.txt similarity index 100% rename from lusSTR/tests/data/2800M_full_anno.txt rename to lusSTR/tests/data/2800M_full_convert.txt diff --git a/lusSTR/tests/data/2800M_full_anno_no_combined_reads.txt b/lusSTR/tests/data/2800M_full_convert_no_combined_reads.txt similarity index 100% rename from lusSTR/tests/data/2800M_full_anno_no_combined_reads.txt rename to lusSTR/tests/data/2800M_full_convert_no_combined_reads.txt diff --git a/lusSTR/tests/data/2800M_uas_anno.txt b/lusSTR/tests/data/2800M_uas_convert.txt similarity index 100% rename from lusSTR/tests/data/2800M_uas_anno.txt rename to lusSTR/tests/data/2800M_uas_convert.txt diff --git a/lusSTR/tests/data/Flanks_testing_file.csv b/lusSTR/tests/data/Flanks_testing_file.csv index ae1627d6..a0b13a6e 100644 --- a/lusSTR/tests/data/Flanks_testing_file.csv +++ b/lusSTR/tests/data/Flanks_testing_file.csv @@ -1,1095 +1,1095 @@ -Locus,Reads,ForenSeq,Sample,Analysis,ProjectentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCTCAA,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCTCAA,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGCCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCTCAA,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACACAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GACCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGGAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGACGAA,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGCCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAA,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGCCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGCCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAACGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGCCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGATGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGCCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGCCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTCCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCCTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCCTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 -TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 -TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 -TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 -TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 -TH01,100,TGCAGGTCACAGGGAACAGAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 -TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 -TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 -TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 -TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 -TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAGTGAATGAATGAATGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 -TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 -TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 -TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 -TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAATGTGGGAAATAAGG,A01,A1,B1 -TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAATGATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 -TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 -TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 -TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 -TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 -TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 -TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 -TPOX,100,TGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 -TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 -TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 -TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 -TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 -TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 -TPOX,100,TGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 -TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGTATGTTTGG,A01,A1,B1 -TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 -TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAAACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGCTAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATGGATGGATAGATAGATAGATGGATAGATAGATAGACAGACAGACAGACAGATAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGACAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGGTAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGAGAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATGGATGGATAGATAGATAGATGGATAGATAGATAGACAGACAGACAGACAGATAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATGGATGGATGGATAGATAGATAGATGGATAGATAGATAGACAGACAGACAGACAGATAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGTTCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 -vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +Locus,Reads,ForenSeq,SampleID,Analysis,ProjectentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCTCAA,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCTCAA,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGCCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCTCAA,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACACAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GACCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGGAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGACGAA,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGCCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAA,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGCCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGCCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAACGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGCCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGATGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGCCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGCCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaD,100,GATCACTTGAGCCTGGAAGGTCGAAGCTGAAGTGAGCCATGATCACACCACTACACTCCAGCCTAGGTGACAGAGCAAGACACCATCTCAAGAAAGAAAAAAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAAACGAAGGGGAAAAAAAGAGAATCATAAACATAAATGTAAAATTTCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTCCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCCTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCCTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +PentaE,100,AGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGACTGAGTCTTGCTCAGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCACTTCAATCTCCACCTCCT,A01,A1,B1 +TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 +TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 +TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 +TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 +TH01,100,TGCAGGTCACAGGGAACAGAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 +TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 +TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 +TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 +TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 +TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAGTGAATGAATGAATGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 +TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 +TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 +TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 +TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAATGTGGGAAATAAGG,A01,A1,B1 +TH01,100,TGCAGGTCACAGGGAACACAGACTCCATGGTGAATGAATGAATGAATGAATGAATGAATGATGAATGAATGAATGAGGGAAATAAGG,A01,A1,B1 +TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 +TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 +TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 +TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 +TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 +TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 +TPOX,100,TGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 +TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 +TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 +TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 +TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 +TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 +TPOX,100,TGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 +TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGTATGTTTGG,A01,A1,B1 +TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 +TPOX,100,TGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATGTTTGG,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAAACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGCTAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATGGATGGATAGATAGATAGATGGATAGATAGATAGACAGACAGACAGACAGATAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGACAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGGTAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGAGAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATGGATGGATAGATAGATAGATGGATAGATAGATAGACAGACAGACAGACAGATAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATGGATGGATGGATAGATAGATAGATGGATAGATAGATAGACAGACAGACAGACAGATAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGTTCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 +vWA,100,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A01,A1,B1 \ No newline at end of file diff --git a/lusSTR/tests/data/lusstr_output.csv b/lusSTR/tests/data/lusstr_output.csv new file mode 100644 index 00000000..a53ea100 --- /dev/null +++ b/lusSTR/tests/data/lusstr_output.csv @@ -0,0 +1,129 @@ +Locus,Reads,Repeat Sequence,SampleID,Project,Analysis +D1S1656,33,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGGTGTGTGTGTG,Positive Control,Project1,Analysis1 +D1S1656,13,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATGTGTGTGTG,Positive Control,Project1,Analysis1 +D1S1656,231,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGGTGTGTGTGTG,Positive Control,Project1,Analysis1 +D1S1656,162,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATGTGTGTGTG,Positive Control,Project1,Analysis1 +TPOX,59,AATGAATGAATGAATGAATGAATGAATGAATGAATGAATG,Positive Control,Project1,Analysis1 +TPOX,1216,AATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATG,Positive Control,Project1,Analysis1 +D2S441,44,TCTATCTATCTATCTATCTATCTATCTATCTATCTA,Positive Control,Project1,Analysis1 +D2S441,1781,TCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,Positive Control,Project1,Analysis1 +D2S441,33,TCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATTTATCTATCTA,Positive Control,Project1,Analysis1 +D2S441,1330,TCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATTTATCTATCTA,Positive Control,Project1,Analysis1 +D2S1338,16,TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,Positive Control,Project1,Analysis1 +D2S1338,37,TGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,Positive Control,Project1,Analysis1 +D2S1338,246,TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,Positive Control,Project1,Analysis1 +D2S1338,2165,TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,Positive Control,Project1,Analysis1 +D2S1338,19,TGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,Positive Control,Project1,Analysis1 +D2S1338,15,TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCTTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,Positive Control,Project1,Analysis1 +D2S1338,23,TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,Positive Control,Project1,Analysis1 +D2S1338,259,TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,Positive Control,Project1,Analysis1 +D2S1338,21,TGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,Positive Control,Project1,Analysis1 +D2S1338,1656,TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,Positive Control,Project1,Analysis1 +D2S1338,15,TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTCCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,Positive Control,Project1,Analysis1 +D2S1338,15,TGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,Positive Control,Project1,Analysis1 +D3S1358,12,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,Positive Control,Project1,Analysis1 +D3S1358,135,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,Positive Control,Project1,Analysis1 +D3S1358,1597,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,Positive Control,Project1,Analysis1 +D3S1358,1855,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,Positive Control,Project1,Analysis1 +D3S1358,13,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,Positive Control,Project1,Analysis1 +D4S2408,38,ATCTATCTATCTATCTATCTATCTATCTATCT,Positive Control,Project1,Analysis1 +D4S2408,1357,ATCTATCTATCTATCTATCTATCTATCTATCTATCT,Positive Control,Project1,Analysis1 +D4S2408,11,ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,Positive Control,Project1,Analysis1 +FGA,135,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,Positive Control,Project1,Analysis1 +FGA,1043,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,Positive Control,Project1,Analysis1 +FGA,21,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,Positive Control,Project1,Analysis1 +FGA,182,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,Positive Control,Project1,Analysis1 +FGA,1023,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,Positive Control,Project1,Analysis1 +FGA,12,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,Positive Control,Project1,Analysis1 +D5S818,21,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAG,Positive Control,Project1,Analysis1 +D5S818,382,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAG,Positive Control,Project1,Analysis1 +CSF1PO,29,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,Positive Control,Project1,Analysis1 +CSF1PO,702,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,Positive Control,Project1,Analysis1 +CSF1PO,11,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,Positive Control,Project1,Analysis1 +D6S1043,219,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,Positive Control,Project1,Analysis1 +D6S1043,2088,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,Positive Control,Project1,Analysis1 +D6S1043,138,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATACATAGATAGATAGATAGATAGAT,Positive Control,Project1,Analysis1 +D6S1043,1487,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATACATAGATAGATAGATAGATAGAT,Positive Control,Project1,Analysis1 +D6S1043,17,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATACATAGATAGATAGATAGATAGAT,Positive Control,Project1,Analysis1 +D7S820,13,GATAGATAGATAGATAGATAGATAGATAGACAGATTGATAGTTT,Positive Control,Project1,Analysis1 +D7S820,517,GATAGATAGATAGATAGATAGATAGATAGATAGACAGATTGATAGTTT,Positive Control,Project1,Analysis1 +D7S820,33,GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGATTGATAGTTT,Positive Control,Project1,Analysis1 +D7S820,522,GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGATTGATAGTTT,Positive Control,Project1,Analysis1 +D8S1179,13,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,Positive Control,Project1,Analysis1 +D8S1179,135,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,Positive Control,Project1,Analysis1 +D8S1179,74,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,Positive Control,Project1,Analysis1 +D8S1179,971,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,Positive Control,Project1,Analysis1 +D8S1179,661,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,Positive Control,Project1,Analysis1 +D9S1122,115,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,Positive Control,Project1,Analysis1 +D9S1122,131,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,Positive Control,Project1,Analysis1 +D9S1122,1551,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,Positive Control,Project1,Analysis1 +D9S1122,1427,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,Positive Control,Project1,Analysis1 +D10S1248,163,GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA,Positive Control,Project1,Analysis1 +D10S1248,1050,GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA,Positive Control,Project1,Analysis1 +D10S1248,116,GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA,Positive Control,Project1,Analysis1 +D10S1248,1022,GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA,Positive Control,Project1,Analysis1 +TH01,247,AATGAATGAATGAATGAATG,Positive Control,Project1,Analysis1 +TH01,4542,AATGAATGAATGAATGAATGAATG,Positive Control,Project1,Analysis1 +TH01,151,AATGAATGAATGAATGAATGATGAATGAATGAATG,Positive Control,Project1,Analysis1 +TH01,3581,AATGAATGAATGAATGAATGAATGATGAATGAATGAATG,Positive Control,Project1,Analysis1 +vWA,23,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA,Positive Control,Project1,Analysis1 +vWA,377,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA,Positive Control,Project1,Analysis1 +vWA,43,TCTATCTGTCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA,Positive Control,Project1,Analysis1 +vWA,325,TCTATCTGTCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA,Positive Control,Project1,Analysis1 +D12S391,11,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGAT,Positive Control,Project1,Analysis1 +D12S391,13,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGAT,Positive Control,Project1,Analysis1 +D12S391,142,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGAT,Positive Control,Project1,Analysis1 +D12S391,11,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGAT,Positive Control,Project1,Analysis1 +D12S391,974,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGAT,Positive Control,Project1,Analysis1 +D12S391,12,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGACAGACAGAC,Positive Control,Project1,Analysis1 +D12S391,137,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGACAGACAGAC,Positive Control,Project1,Analysis1 +D12S391,23,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGACAGAC,Positive Control,Project1,Analysis1 +D12S391,15,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGACAGAC,Positive Control,Project1,Analysis1 +D12S391,751,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGACAGACAGAC,Positive Control,Project1,Analysis1 +D13S317,17,TATCTATCTATCTATCTATCTATCTATCTATCAATCAATCATCTATCTATCTTTCTGTCTGTC,Positive Control,Project1,Analysis1 +D13S317,958,TATCTATCTATCTATCTATCTATCTATCTATCTATCAATCAATCATCTATCTATCTTTCTGTCTGTC,Positive Control,Project1,Analysis1 +D13S317,34,TATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCAATCATCTATCTATCTTTCTGTCTGTC,Positive Control,Project1,Analysis1 +D13S317,831,TATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCAATCATCTATCTATCTTTCTGTCTGTC,Positive Control,Project1,Analysis1 +PentaE,505,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,Positive Control,Project1,Analysis1 +PentaE,14,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,Positive Control,Project1,Analysis1 +PentaE,261,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,Positive Control,Project1,Analysis1 +D16S539,69,GATAGATAGATAGATAGATAGATAGATAGATA,Positive Control,Project1,Analysis1 +D16S539,1019,GATAGATAGATAGATAGATAGATAGATAGATAGATA,Positive Control,Project1,Analysis1 +D16S539,13,GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATA,Positive Control,Project1,Analysis1 +D16S539,135,GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATA,Positive Control,Project1,Analysis1 +D16S539,996,GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATA,Positive Control,Project1,Analysis1 +D17S1301,91,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,Positive Control,Project1,Analysis1 +D17S1301,528,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,Positive Control,Project1,Analysis1 +D17S1301,599,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,Positive Control,Project1,Analysis1 +D18S51,124,AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAGAG,Positive Control,Project1,Analysis1 +D18S51,1009,AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAGAG,Positive Control,Project1,Analysis1 +D18S51,165,AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAGAG,Positive Control,Project1,Analysis1 +D18S51,999,AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAGAG,Positive Control,Project1,Analysis1 +D19S433,122,AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG,Positive Control,Project1,Analysis1 +D19S433,28,AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAAGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG,Positive Control,Project1,Analysis1 +D19S433,16,AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAAGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG,Positive Control,Project1,Analysis1 +D19S433,1782,AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG,Positive Control,Project1,Analysis1 +D19S433,17,AAGGAAAGAAGGTAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG,Positive Control,Project1,Analysis1 +D19S433,35,AAGGAAAGAAGGTAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG,Positive Control,Project1,Analysis1 +D19S433,26,AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAAGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG,Positive Control,Project1,Analysis1 +D19S433,20,AAGGAAAGAATGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG,Positive Control,Project1,Analysis1 +D19S433,17,AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAAGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG,Positive Control,Project1,Analysis1 +D19S433,16,AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAAGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG,Positive Control,Project1,Analysis1 +D19S433,1621,AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG,Positive Control,Project1,Analysis1 +D20S482,13,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,Positive Control,Project1,Analysis1 +D20S482,337,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,Positive Control,Project1,Analysis1 +D20S482,3136,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,Positive Control,Project1,Analysis1 +D20S482,2731,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,Positive Control,Project1,Analysis1 +D20S482,46,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,Positive Control,Project1,Analysis1 +D21S11,84,TCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATCATCTATCTATCCATATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,Positive Control,Project1,Analysis1 +D21S11,1144,TCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATCATCTATCTATCCATATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,Positive Control,Project1,Analysis1 +D21S11,34,TCTATCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATCATCTATCTATCCATATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATATCTA,Positive Control,Project1,Analysis1 +D21S11,14,TCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATCATCTATCTATCCATATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATATCTA,Positive Control,Project1,Analysis1 +D21S11,1064,TCTATCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATCATCTATCTATCCATATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATATCTA,Positive Control,Project1,Analysis1 +D21S11,15,TCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATCATCTATCTATCCATATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATATCTA,Positive Control,Project1,Analysis1 +PentaD,289,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,Positive Control,Project1,Analysis1 +PentaD,278,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,Positive Control,Project1,Analysis1 +D22S1045,13,ATTATTATTATTATTATTATTATTATTATTATTACTATTATT,Positive Control,Project1,Analysis1 +D22S1045,146,ATTATTATTATTATTATTATTATTATTATTATTATTACTATTATT,Positive Control,Project1,Analysis1 +D22S1045,1746,ATTATTATTATTATTATTATTATTATTATTATTATTATTACTATTATT,Positive Control,Project1,Analysis1 +D22S1045,27,ATTATTATTATTATTATTATTATTATTATTATTATTATTATTACTATTATT,Positive Control,Project1,Analysis1 diff --git a/lusSTR/tests/data/lusstr_output.txt b/lusSTR/tests/data/lusstr_output.txt new file mode 100644 index 00000000..d86c37dd --- /dev/null +++ b/lusSTR/tests/data/lusstr_output.txt @@ -0,0 +1,129 @@ +SampleID Project Analysis Locus UAS_Output_Sequence Forward_Strand_Sequence UAS_Output_Bracketed_Notation Forward_Strand_Bracketed_Notation CE_Allele LUS LUS_Plus Reads +Positive_Control Project1 Analysis1 VWA TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA TAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGA TCTA [TCTG]3 [TCTA]12 TCCA TCTA TAGA TGGA [TAGA]12 [CAGA]3 TAGA 16 16_12 16_12_3_1 377 +Positive_Control Project1 Analysis1 VWA TCTATCTGTCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA TAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGA TCTA [TCTG]4 [TCTA]14 TCCA TCTA TAGA TGGA [TAGA]14 [CAGA]4 TAGA 19 19_14 19_14_4_1 325 +Positive_Control Project1 Analysis1 VWA TCTATCTGTCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA TAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGA TCTA [TCTG]4 [TCTA]13 TCCA TCTA TAGA TGGA [TAGA]13 [CAGA]4 TAGA 18 18_13 18_13_4_1 43 +Positive_Control Project1 Analysis1 VWA TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA TAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGA TCTA [TCTG]3 [TCTA]11 TCCA TCTA TAGA TGGA [TAGA]11 [CAGA]3 TAGA 15 15_11 15_11_3_1 23 +Positive_Control Project1 Analysis1 TPOX AATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATG AATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATG [AATG]11 [AATG]11 11 11_11 11_11 1216 +Positive_Control Project1 Analysis1 TPOX AATGAATGAATGAATGAATGAATGAATGAATGAATGAATG AATGAATGAATGAATGAATGAATGAATGAATGAATGAATG [AATG]10 [AATG]10 10 10_10 10_10 59 +Positive_Control Project1 Analysis1 TH01 AATGAATGAATGAATGAATGAATG AATGAATGAATGAATGAATGAATG [AATG]6 [AATG]6 6 6_6 6_6 4542 +Positive_Control Project1 Analysis1 TH01 AATGAATGAATGAATGAATGAATGATGAATGAATGAATG AATGAATGAATGAATGAATGAATGATGAATGAATGAATG [AATG]6 ATG [AATG]3 [AATG]6 ATG [AATG]3 9.3 9.3_6 9.3_6 3581 +Positive_Control Project1 Analysis1 TH01 AATGAATGAATGAATGAATG AATGAATGAATGAATGAATG [AATG]5 [AATG]5 5 5_5 5_5 247 +Positive_Control Project1 Analysis1 TH01 AATGAATGAATGAATGAATGATGAATGAATGAATG AATGAATGAATGAATGAATGATGAATGAATGAATG [AATG]5 ATG [AATG]3 [AATG]5 ATG [AATG]3 8.3 8.3_5 8.3_5 151 +Positive_Control Project1 Analysis1 PENTA E AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA TCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTT [AAAGA]7 [TCTTT]7 7 7_7 7_7 505 +Positive_Control Project1 Analysis1 PENTA E AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA TCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTT [AAAGA]14 [TCTTT]14 14 14_14 14_14 261 +Positive_Control Project1 Analysis1 PENTA E AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA TCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTT [AAAGA]13 [TCTTT]13 13 13_13 13_13 14 +Positive_Control Project1 Analysis1 PENTA D AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA AAAAG [AAAGA]12 AAAAG [AAAGA]12 12 12_12 12_12 289 +Positive_Control Project1 Analysis1 PENTA D AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA AAAAG [AAAGA]13 AAAAG [AAAGA]13 13 13_13 13_13 278 +Positive_Control Project1 Analysis1 FGA TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA [TTTC]3 TTTT TTCT [CTTT]12 CTCC [TTCC]2 [GGAA]2 GGAG [AAAG]12 AGAA AAAA [GAAA]3 20 20_12 20_12_3_0 1043 +Positive_Control Project1 Analysis1 FGA TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA [TTTC]3 TTTT TTCT [CTTT]15 CTCC [TTCC]2 [GGAA]2 GGAG [AAAG]15 AGAA AAAA [GAAA]3 23 23_15 23_15_3_0 1023 +Positive_Control Project1 Analysis1 FGA TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA [TTTC]3 TTTT TTCT [CTTT]14 CTCC [TTCC]2 [GGAA]2 GGAG [AAAG]14 AGAA AAAA [GAAA]3 22 22_14 22_14_3_0 182 +Positive_Control Project1 Analysis1 FGA TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA [TTTC]3 TTTT TTCT [CTTT]11 CTCC [TTCC]2 [GGAA]2 GGAG [AAAG]11 AGAA AAAA [GAAA]3 19 19_11 19_11_3_0 135 +Positive_Control Project1 Analysis1 FGA TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA [TTTC]3 TTTT TTCT [CTTT]13 CTCC [TTCC]2 [GGAA]2 GGAG [AAAG]13 AGAA AAAA [GAAA]3 21 21_13 21_13_3_0 21 +Positive_Control Project1 Analysis1 FGA TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA [TTTC]3 TTTT TTCT [CTTT]16 CTCC [TTCC]2 [GGAA]2 GGAG [AAAG]16 AGAA AAAA [GAAA]3 24 24_16 24_16_3_0 12 +Positive_Control Project1 Analysis1 D9S1122 TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA TAGA TCGA [TAGA]10 TAGA TCGA [TAGA]10 12 12_10 12_10 1551 +Positive_Control Project1 Analysis1 D9S1122 TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA [TAGA]12 [TAGA]12 12 12_12 12_12 1427 +Positive_Control Project1 Analysis1 D9S1122 TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA [TAGA]11 [TAGA]11 11 11_11 11_11 131 +Positive_Control Project1 Analysis1 D9S1122 TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGA TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGA TAGA TCGA [TAGA]9 TAGA TCGA [TAGA]9 11 11_9 11_9 115 +Positive_Control Project1 Analysis1 D8S1179 TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA TCTA TCTG [TCTA]12 TCTA TCTG [TCTA]12 14 14_12 14_12_1_0 971 +Positive_Control Project1 Analysis1 D8S1179 TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA [TCTA]2 TCTG [TCTA]12 [TCTA]2 TCTG [TCTA]12 15 15_12 15_12_1_0 661 +Positive_Control Project1 Analysis1 D8S1179 TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA TCTA TCTG [TCTA]11 TCTA TCTG [TCTA]11 13 13_11 13_11_1_0 135 +Positive_Control Project1 Analysis1 D8S1179 TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA [TCTA]2 TCTG [TCTA]11 [TCTA]2 TCTG [TCTA]11 14 14_11 14_11_1_0 74 +Positive_Control Project1 Analysis1 D8S1179 TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA TCTA TCTG [TCTA]10 TCTA TCTG [TCTA]10 12 12_10 12_10_1_0 13 +Positive_Control Project1 Analysis1 D7S820 GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGATTGATAGTTT AAACTATCAATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATC [GATA]11 GACA GATT GATA GTTT AAAC TATC AATC TGTC [TATC]11 11 11_11 11_11_1_0 522 +Positive_Control Project1 Analysis1 D7S820 GATAGATAGATAGATAGATAGATAGATAGATAGACAGATTGATAGTTT AAACTATCAATCTGTCTATCTATCTATCTATCTATCTATCTATCTATC [GATA]8 GACA GATT GATA GTTT AAAC TATC AATC TGTC [TATC]8 8 8_8 8_8_1_0 517 +Positive_Control Project1 Analysis1 D7S820 GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGATTGATAGTTT AAACTATCAATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATC [GATA]10 GACA GATT GATA GTTT AAAC TATC AATC TGTC [TATC]10 10 10_10 10_10_1_0 33 +Positive_Control Project1 Analysis1 D7S820 GATAGATAGATAGATAGATAGATAGATAGACAGATTGATAGTTT AAACTATCAATCTGTCTATCTATCTATCTATCTATCTATCTATC [GATA]7 GACA GATT GATA GTTT AAAC TATC AATC TGTC [TATC]7 7 7_7 7_7_1_0 13 +Positive_Control Project1 Analysis1 D6S1043 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT [AGAT]12 [ATCT]12 12 12_12 12_12_0 2088 +Positive_Control Project1 Analysis1 D6S1043 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATACATAGATAGATAGATAGATAGAT ATCTATCTATCTATCTATCTATGTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT [AGAT]14 ACAT [AGAT]5 [ATCT]5 ATGT [ATCT]14 20 20_14 20_14_1 1487 +Positive_Control Project1 Analysis1 D6S1043 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT [AGAT]11 [ATCT]11 11 11_11 11_11_0 219 +Positive_Control Project1 Analysis1 D6S1043 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATACATAGATAGATAGATAGATAGAT ATCTATCTATCTATCTATCTATGTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT [AGAT]13 ACAT [AGAT]5 [ATCT]5 ATGT [ATCT]13 19 19_13 19_13_1 138 +Positive_Control Project1 Analysis1 D6S1043 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATACATAGATAGATAGATAGATAGAT ATCTATCTATCTATCTATCTATGTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT [AGAT]15 ACAT [AGAT]5 [ATCT]5 ATGT [ATCT]15 21 21_15 21_15_1 17 +Positive_Control Project1 Analysis1 D5S818 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAG CTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT [AGAT]12 AGAG CTCT [ATCT]12 12 12_12 12_12 382 +Positive_Control Project1 Analysis1 D5S818 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAG CTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT [AGAT]11 AGAG CTCT [ATCT]11 11 11_11 11_11 21 +Positive_Control Project1 Analysis1 D4S2408 ATCTATCTATCTATCTATCTATCTATCTATCTATCT ATCTATCTATCTATCTATCTATCTATCTATCTATCT [ATCT]9 [ATCT]9 9 9_9 9_9_0 1357 +Positive_Control Project1 Analysis1 D4S2408 ATCTATCTATCTATCTATCTATCTATCTATCT ATCTATCTATCTATCTATCTATCTATCTATCT [ATCT]8 [ATCT]8 8 8_8 8_8_0 38 +Positive_Control Project1 Analysis1 D4S2408 ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT [ATCT]10 [ATCT]10 10 10_10 10_10_0 11 +Positive_Control Project1 Analysis1 D3S1358 TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA TCTA [TCTG]3 [TCTA]14 TCTA [TCTG]3 [TCTA]14 18 18_14 18_14_3 1855 +Positive_Control Project1 Analysis1 D3S1358 TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA TCTA [TCTG]3 [TCTA]13 TCTA [TCTG]3 [TCTA]13 17 17_13 17_13_3 1597 +Positive_Control Project1 Analysis1 D3S1358 TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA TCTA [TCTG]3 [TCTA]12 TCTA [TCTG]3 [TCTA]12 16 16_12 16_12_3 135 +Positive_Control Project1 Analysis1 D3S1358 TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA TCTA [TCTG]3 [TCTA]15 TCTA [TCTG]3 [TCTA]15 19 19_15 19_15_3 13 +Positive_Control Project1 Analysis1 D3S1358 TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA TCTA [TCTG]3 [TCTA]11 TCTA [TCTG]3 [TCTA]11 15 15_11 15_11_3 12 +Positive_Control Project1 Analysis1 D2S441 TCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA TCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA [TCTA]10 [TCTA]10 10 10_10 10_10_0 1781 +Positive_Control Project1 Analysis1 D2S441 TCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATTTATCTATCTA TCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATTTATCTATCTA [TCTA]11 TTTA [TCTA]2 [TCTA]11 TTTA [TCTA]2 14 14_11 14_11_0 1330 +Positive_Control Project1 Analysis1 D2S441 TCTATCTATCTATCTATCTATCTATCTATCTATCTA TCTATCTATCTATCTATCTATCTATCTATCTATCTA [TCTA]9 [TCTA]9 9 9_9 9_9_0 44 +Positive_Control Project1 Analysis1 D2S441 TCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATTTATCTATCTA TCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATTTATCTATCTA [TCTA]10 TTTA [TCTA]2 [TCTA]10 TTTA [TCTA]2 13 13_10 13_10_0 33 +Positive_Control Project1 Analysis1 D2S1338 TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC GGAAGGAAGGACGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGCAGGCAGGCAGGCAGGCAGGCAGGCA [TGCC]7 [TTCC]12 GTCC [TTCC]2 [GGAA]2 GGAC [GGAA]12 [GGCA]7 22 22_12 22_12_1_7 2165 +Positive_Control Project1 Analysis1 D2S1338 TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC GGAAGGAAGGACGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGCAGGCAGGCAGGCAGGCAGGCAGGCA [TGCC]7 [TTCC]15 GTCC [TTCC]2 [GGAA]2 GGAC [GGAA]15 [GGCA]7 25 25_15 25_15_1_7 1656 +Positive_Control Project1 Analysis1 D2S1338 TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC GGAAGGAAGGACGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGCAGGCAGGCAGGCAGGCAGGCAGGCA [TGCC]7 [TTCC]14 GTCC [TTCC]2 [GGAA]2 GGAC [GGAA]14 [GGCA]7 24 24_14 24_14_1_7 259 +Positive_Control Project1 Analysis1 D2S1338 TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC GGAAGGAAGGACGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGCAGGCAGGCAGGCAGGCAGGCAGGCA [TGCC]7 [TTCC]11 GTCC [TTCC]2 [GGAA]2 GGAC [GGAA]11 [GGCA]7 21 21_11 21_11_1_7 246 +Positive_Control Project1 Analysis1 D2S1338 TGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC GGAAGGAAGGACGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGCAGGCAGGCAGGCAGGCAGGCA [TGCC]6 [TTCC]12 GTCC [TTCC]2 [GGAA]2 GGAC [GGAA]12 [GGCA]6 21 21_12 21_12_1_6 37 +Positive_Control Project1 Analysis1 D2S1338 TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC GGAAGGAAGGACGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGCAGGCAGGCAGGCAGGCAGGCAGGCA [TGCC]7 [TTCC]13 GTCC [TTCC]2 [GGAA]2 GGAC [GGAA]13 [GGCA]7 23 23_13 23_13_1_7 23 +Positive_Control Project1 Analysis1 D2S1338 TGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC GGAAGGAAGGACGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGCAGGCAGGCAGGCAGGCAGGCA [TGCC]6 [TTCC]15 GTCC [TTCC]2 [GGAA]2 GGAC [GGAA]15 [GGCA]6 24 24_15 24_15_1_6 21 +Positive_Control Project1 Analysis1 D2S1338 TGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC GGAAGGAAGGACGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGCAGGCAGGCAGGCAGGCAGGCA [TGCC]6 [TTCC]13 GTCC [TTCC]2 [GGAA]2 GGAC [GGAA]13 [GGCA]6 22 22_13 22_13_1_6 19 +Positive_Control Project1 Analysis1 D2S1338 TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC GGAAGGAAGGACGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGCAGGCAGGCAGGCAGGCAGGCAGGCA [TGCC]7 [TTCC]10 GTCC [TTCC]2 [GGAA]2 GGAC [GGAA]10 [GGCA]7 20 20_10 20_10_1_7 16 +Positive_Control Project1 Analysis1 D2S1338 TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTCCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC GGAAGGAAGGACGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGGAGGAAGGAAGGAAGGAAGGCAGGCAGGCAGGCAGGCAGGCAGGCA [TGCC]7 [TTCC]4 TCCC [TTCC]10 GTCC [TTCC]2 [GGAA]2 GGAC [GGAA]10 GGGA [GGAA]4 [GGCA]7 25 25_10 25_10_1_7 15 +Positive_Control Project1 Analysis1 D2S1338 TGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC GGAAGGAAGGACGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGCAGGCAGGCAGGCAGGCAGGCA [TGCC]6 [TTCC]16 GTCC [TTCC]2 [GGAA]2 GGAC [GGAA]16 [GGCA]6 25 25_16 25_16_1_6 15 +Positive_Control Project1 Analysis1 D2S1338 TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCTTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC GGAAGGAAGGACGGAAGGAAGGAAGGAAGGAAGGAAAGAAGGAAGGAAGGAAGGAAGGAAGGCAGGCAGGCAGGCAGGCAGGCAGGCA [TGCC]7 [TTCC]5 TTCT [TTCC]6 GTCC [TTCC]2 [GGAA]2 GGAC [GGAA]6 AGAA [GGAA]5 [GGCA]7 22 22_6 22_6_1_7 15 +Positive_Control Project1 Analysis1 D22S1045 ATTATTATTATTATTATTATTATTATTATTATTATTATTACTATTATT ATTATTATTATTATTATTATTATTATTATTATTATTATTACTATTATT [ATT]13 ACT [ATT]2 [ATT]13 ACT [ATT]2 16 16_13 16_13 1746 +Positive_Control Project1 Analysis1 D22S1045 ATTATTATTATTATTATTATTATTATTATTATTATTACTATTATT ATTATTATTATTATTATTATTATTATTATTATTATTACTATTATT [ATT]12 ACT [ATT]2 [ATT]12 ACT [ATT]2 15 15_12 15_12 146 +Positive_Control Project1 Analysis1 D22S1045 ATTATTATTATTATTATTATTATTATTATTATTATTATTATTACTATTATT ATTATTATTATTATTATTATTATTATTATTATTATTATTATTACTATTATT [ATT]14 ACT [ATT]2 [ATT]14 ACT [ATT]2 17 17_14 17_14 27 +Positive_Control Project1 Analysis1 D22S1045 ATTATTATTATTATTATTATTATTATTATTATTACTATTATT ATTATTATTATTATTATTATTATTATTATTATTACTATTATT [ATT]11 ACT [ATT]2 [ATT]11 ACT [ATT]2 14 14_11 14_11 13 +Positive_Control Project1 Analysis1 D21S11 TCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATCATCTATCTATCCATATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA TCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATCATCTATCTATCCATATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA [TCTA]4 [TCTG]6 [TCTA]3 TA [TCTA]3 TCA [TCTA]2 TCCA TA [TCTA]11 [TCTA]4 [TCTG]6 [TCTA]3 TA [TCTA]3 TCA [TCTA]2 TCCA TA [TCTA]11 29 29_11 29_11_4_6 1144 +Positive_Control Project1 Analysis1 D21S11 TCTATCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATCATCTATCTATCCATATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATATCTA TCTATCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATCATCTATCTATCCATATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATATCTA [TCTA]5 [TCTG]6 [TCTA]3 TA [TCTA]3 TCA [TCTA]2 TCCA TA [TCTA]11 TA TCTA [TCTA]5 [TCTG]6 [TCTA]3 TA [TCTA]3 TCA [TCTA]2 TCCA TA [TCTA]11 TA TCTA 31.2 31.2_11 31.2_11_5_6 1064 +Positive_Control Project1 Analysis1 D21S11 TCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATCATCTATCTATCCATATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA TCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATCATCTATCTATCCATATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA [TCTA]4 [TCTG]6 [TCTA]3 TA [TCTA]3 TCA [TCTA]2 TCCA TA [TCTA]10 [TCTA]4 [TCTG]6 [TCTA]3 TA [TCTA]3 TCA [TCTA]2 TCCA TA [TCTA]10 28 28_10 28_10_4_6 84 +Positive_Control Project1 Analysis1 D21S11 TCTATCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATCATCTATCTATCCATATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATATCTA TCTATCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATCATCTATCTATCCATATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATATCTA [TCTA]5 [TCTG]6 [TCTA]3 TA [TCTA]3 TCA [TCTA]2 TCCA TA [TCTA]10 TA TCTA [TCTA]5 [TCTG]6 [TCTA]3 TA [TCTA]3 TCA [TCTA]2 TCCA TA [TCTA]10 TA TCTA 30.2 30.2_10 30.2_10_5_6 34 +Positive_Control Project1 Analysis1 D21S11 TCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATCATCTATCTATCCATATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATATCTA TCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATCATCTATCTATCCATATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATATCTA [TCTA]4 [TCTG]7 [TCTA]3 TA [TCTA]3 TCA [TCTA]2 TCCA TA [TCTA]11 TA TCTA [TCTA]4 [TCTG]7 [TCTA]3 TA [TCTA]3 TCA [TCTA]2 TCCA TA [TCTA]11 TA TCTA 31.2 31.2_11 31.2_11_4_7 15 +Positive_Control Project1 Analysis1 D21S11 TCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATCATCTATCTATCCATATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATATCTA TCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATCATCTATCTATCCATATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATATCTA [TCTA]4 [TCTG]6 [TCTA]3 TA [TCTA]3 TCA [TCTA]2 TCCA TA [TCTA]11 TA TCTA [TCTA]4 [TCTG]6 [TCTA]3 TA [TCTA]3 TCA [TCTA]2 TCCA TA [TCTA]11 TA TCTA 30.2 30.2_11 30.2_11_4_6 14 +Positive_Control Project1 Analysis1 D20S482 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT [AGAT]14 [AGAT]14 14 14_14 14_14 3136 +Positive_Control Project1 Analysis1 D20S482 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT [AGAT]15 [AGAT]15 15 15_15 15_15 2731 +Positive_Control Project1 Analysis1 D20S482 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT [AGAT]13 [AGAT]13 13 13_13 13_13 337 +Positive_Control Project1 Analysis1 D20S482 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT [AGAT]16 [AGAT]16 16 16_16 16_16 46 +Positive_Control Project1 Analysis1 D20S482 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT [AGAT]12 [AGAT]12 12 12_12 12_12 13 +Positive_Control Project1 Analysis1 D1S1656 TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGGTGTGTGTGTG CACACACACACCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA [TAGA]11 TAGG [TGTG]2 TG CA [CACA]2 CCTA [TCTA]11 12 12_11 12_11_1_0 231 +Positive_Control Project1 Analysis1 D1S1656 TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATGTGTGTGTG CACACACACATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA [TAGA]13 [TGTG]2 TG CA [CACA]2 [TCTA]13 13 13_13 13_13_0_0 162 +Positive_Control Project1 Analysis1 D1S1656 TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGGTGTGTGTGTG CACACACACACCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA [TAGA]10 TAGG [TGTG]2 TG CA [CACA]2 CCTA [TCTA]10 11 11_10 11_10_1_0 33 +Positive_Control Project1 Analysis1 D1S1656 TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATGTGTGTGTG CACACACACATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA [TAGA]12 [TGTG]2 TG CA [CACA]2 [TCTA]12 12 12_12 12_12_0_0 13 +Positive_Control Project1 Analysis1 D19S433 AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG CTCTCTTTCTTCCTCTCTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTACCTTCTTTCCTT AAGG AAAG AAGG TAGG [AAGG]11 AGAG AGGA AGAA AGAG AG CT CTCT TTCT TCCT CTCT [CCTT]11 CCTA CCTT CTTT CCTT 13 13_11 13_11_1_0 1782 +Positive_Control Project1 Analysis1 D19S433 AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG CTCTCTTTCTTCCTCTCTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTACCTTCTTTCCTT AAGG AAAG AAGG TAGG [AAGG]12 AGAG AGGA AGAA AGAG AG CT CTCT TTCT TCCT CTCT [CCTT]12 CCTA CCTT CTTT CCTT 14 14_12 14_12_1_0 1621 +Positive_Control Project1 Analysis1 D19S433 AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG CTCTCTTTCTTCCTCTCTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTACCTTCTTTCCTT AAGG AAAG AAGG TAGG [AAGG]10 AGAG AGGA AGAA AGAG AG CT CTCT TTCT TCCT CTCT [CCTT]10 CCTA CCTT CTTT CCTT 12 12_10 12_10_1_0 122 +Positive_Control Project1 Analysis1 D19S433 AAGGAAAGAAGGTAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG CTCTCTTTCTTCCTCTCTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTACCTACCTTCTTTCCTT AAGG AAAG AAGG [TAGG]2 [AAGG]11 AGAG AGGA AGAA AGAG AG CT CTCT TTCT TCCT CTCT [CCTT]11 [CCTA]2 CCTT CTTT CCTT 14 14_11 14_11_1_0 35 +Positive_Control Project1 Analysis1 D19S433 AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAAGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG CTCTCTTTCTTCCTCTCTCCTTCCTTCCTTCTTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTACCTTCTTTCCTT AAGG AAAG AAGG TAGG [AAGG]7 AAAG [AAGG]3 AGAG AGGA AGAA AGAG AG CT CTCT TTCT TCCT CTCT [CCTT]3 CTTT [CCTT]7 CCTA CCTT CTTT CCTT 13 13_7 13_7_1_0 28 +Positive_Control Project1 Analysis1 D19S433 AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAAGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG CTCTCTTTCTTCCTCTCTCCTTCCTTCCTTCTTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTACCTTCTTTCCTT AAGG AAAG AAGG TAGG [AAGG]8 AAAG [AAGG]3 AGAG AGGA AGAA AGAG AG CT CTCT TTCT TCCT CTCT [CCTT]3 CTTT [CCTT]8 CCTA CCTT CTTT CCTT 14 14_8 14_8_1_0 26 +Positive_Control Project1 Analysis1 D19S433 AAGGAAAGAATGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG CTCTCTTTCTTCCTCTCTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTACATTCTTTCCTT AAGG AAAG AATG TAGG [AAGG]12 AGAG AGGA AGAA AGAG AG CT CTCT TTCT TCCT CTCT [CCTT]12 CCTA CATT CTTT CCTT 14 14_12 14_12_1_0 20 +Positive_Control Project1 Analysis1 D19S433 AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAAGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG CTCTCTTTCTTCCTCTCTCCTTCCTTCCTTCCTTCCTTCTTTCCTTCCTTCCTTCCTTCCTTCCTTCCTACCTTCTTTCCTT AAGG AAAG AAGG TAGG [AAGG]6 AAAG [AAGG]5 AGAG AGGA AGAA AGAG AG CT CTCT TTCT TCCT CTCT [CCTT]5 CTTT [CCTT]6 CCTA CCTT CTTT CCTT 14 14_6 14_6_1_0 17 +Positive_Control Project1 Analysis1 D19S433 AAGGAAAGAAGGTAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG CTCTCTTTCTTCCTCTCTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTACCTACCTTCTTTCCTT AAGG AAAG AAGG [TAGG]2 [AAGG]10 AGAG AGGA AGAA AGAG AG CT CTCT TTCT TCCT CTCT [CCTT]10 [CCTA]2 CCTT CTTT CCTT 13 13_10 13_10_1_0 17 +Positive_Control Project1 Analysis1 D19S433 AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAAGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG CTCTCTTTCTTCCTCTCTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCTTTCCTTCCTTCCTTCCTTCCTACCTTCTTTCCTT AAGG AAAG AAGG TAGG [AAGG]4 AAAG [AAGG]7 AGAG AGGA AGAA AGAG AG CT CTCT TTCT TCCT CTCT [CCTT]7 CTTT [CCTT]4 CCTA CCTT CTTT CCTT 14 14_7 14_7_1_0 16 +Positive_Control Project1 Analysis1 D19S433 AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAAGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG CTCTCTTTCTTCCTCTCTCCTTCCTTCCTTCCTTCCTTCTTTCCTTCCTTCCTTCCTTCCTTCCTACCTTCTTTCCTT AAGG AAAG AAGG TAGG [AAGG]5 AAAG [AAGG]5 AGAG AGGA AGAA AGAG AG CT CTCT TTCT TCCT CTCT [CCTT]5 CTTT [CCTT]5 CCTA CCTT CTTT CCTT 13 13_5 13_5_1_0 16 +Positive_Control Project1 Analysis1 D18S51 AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAGAG AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAGAG [AGAA]16 AAAG AGAG AG [AGAA]16 AAAG AGAG AG 16 16_16 16_16_1 1009 +Positive_Control Project1 Analysis1 D18S51 AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAGAG AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAGAG [AGAA]18 AAAG AGAG AG [AGAA]18 AAAG AGAG AG 18 18_18 18_18_1 999 +Positive_Control Project1 Analysis1 D18S51 AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAGAG AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAGAG [AGAA]17 AAAG AGAG AG [AGAA]17 AAAG AGAG AG 17 17_17 17_17_1 165 +Positive_Control Project1 Analysis1 D18S51 AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAGAG AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAGAG [AGAA]15 AAAG AGAG AG [AGAA]15 AAAG AGAG AG 15 15_15 15_15_1 124 +Positive_Control Project1 Analysis1 D17S1301 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT [AGAT]12 [AGAT]12 12 12_12 12_12 599 +Positive_Control Project1 Analysis1 D17S1301 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT [AGAT]11 [AGAT]11 11 11_11 11_11 528 +Positive_Control Project1 Analysis1 D17S1301 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT AGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT [AGAT]10 [AGAT]10 10 10_10 10_10 91 +Positive_Control Project1 Analysis1 D16S539 GATAGATAGATAGATAGATAGATAGATAGATAGATA GATAGATAGATAGATAGATAGATAGATAGATAGATA [GATA]9 [GATA]9 9 9_9 9_9_0 1019 +Positive_Control Project1 Analysis1 D16S539 GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATA GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATA [GATA]13 [GATA]13 13 13_13 13_13_0 996 +Positive_Control Project1 Analysis1 D16S539 GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATA GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATA [GATA]12 [GATA]12 12 12_12 12_12_0 135 +Positive_Control Project1 Analysis1 D16S539 GATAGATAGATAGATAGATAGATAGATAGATA GATAGATAGATAGATAGATAGATAGATAGATA [GATA]8 [GATA]8 8 8_8 8_8_0 69 +Positive_Control Project1 Analysis1 D16S539 GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATA GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATA [GATA]11 [GATA]11 11 11_11 11_11_0 13 +Positive_Control Project1 Analysis1 D13S317 TATCTATCTATCTATCTATCTATCTATCTATCTATCAATCAATCATCTATCTATCTTTCTGTCTGTC TATCTATCTATCTATCTATCTATCTATCTATCTATCAATCAATCATCTATCTATCTTTCTGTCTGTC [TATC]9 [AATC]2 [ATCT]3 TTCT GTCT GTC [TATC]9 [AATC]2 [ATCT]3 TTCT GTCT GTC 9 9_9 9_9_3_1 958 +Positive_Control Project1 Analysis1 D13S317 TATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCAATCATCTATCTATCTTTCTGTCTGTC TATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCAATCATCTATCTATCTTTCTGTCTGTC [TATC]12 AATC [ATCT]3 TTCT GTCT GTC [TATC]12 AATC [ATCT]3 TTCT GTCT GTC 11 11_12 11_12_3_1 831 +Positive_Control Project1 Analysis1 D13S317 TATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCAATCATCTATCTATCTTTCTGTCTGTC TATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCAATCATCTATCTATCTTTCTGTCTGTC [TATC]11 AATC [ATCT]3 TTCT GTCT GTC [TATC]11 AATC [ATCT]3 TTCT GTCT GTC 10 10_11 10_11_3_1 34 +Positive_Control Project1 Analysis1 D13S317 TATCTATCTATCTATCTATCTATCTATCTATCAATCAATCATCTATCTATCTTTCTGTCTGTC TATCTATCTATCTATCTATCTATCTATCTATCAATCAATCATCTATCTATCTTTCTGTCTGTC [TATC]8 [AATC]2 [ATCT]3 TTCT GTCT GTC [TATC]8 [AATC]2 [ATCT]3 TTCT GTCT GTC 8 8_8 8_8_3_1 17 +Positive_Control Project1 Analysis1 D12S391 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGAT AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGAT [AGAT]11 [AGAC]6 AGAT [AGAT]11 [AGAC]6 AGAT 18 18_11 18_11_6_0 974 +Positive_Control Project1 Analysis1 D12S391 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGACAGACAGAC AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGACAGACAGAC [AGAT]14 [AGAC]9 [AGAT]14 [AGAC]9 23 23_14 23_14_9_0 751 +Positive_Control Project1 Analysis1 D12S391 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGAT AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGAT [AGAT]10 [AGAC]6 AGAT [AGAT]10 [AGAC]6 AGAT 17 17_10 17_10_6_0 142 +Positive_Control Project1 Analysis1 D12S391 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGACAGACAGAC AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGACAGACAGAC [AGAT]13 [AGAC]9 [AGAT]13 [AGAC]9 22 22_13 22_13_9_0 137 +Positive_Control Project1 Analysis1 D12S391 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGACAGAC AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGACAGAC [AGAT]14 [AGAC]8 [AGAT]14 [AGAC]8 22 22_14 22_14_8_0 23 +Positive_Control Project1 Analysis1 D12S391 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGACAGAC AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGACAGAC [AGAT]15 [AGAC]8 [AGAT]15 [AGAC]8 23 23_15 23_15_8_0 15 +Positive_Control Project1 Analysis1 D12S391 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGAT AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGAT [AGAT]11 [AGAC]5 AGAT [AGAT]11 [AGAC]5 AGAT 17 17_11 17_11_5_0 13 +Positive_Control Project1 Analysis1 D12S391 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGACAGACAGAC AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGACAGACAGAC [AGAT]12 [AGAC]9 [AGAT]12 [AGAC]9 21 21_12 21_12_9_0 12 +Positive_Control Project1 Analysis1 D12S391 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGAT AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGAT [AGAT]12 [AGAC]5 AGAT [AGAT]12 [AGAC]5 AGAT 18 18_12 18_12_5_0 11 +Positive_Control Project1 Analysis1 D12S391 AGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGAT AGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGAT [AGAT]9 [AGAC]6 AGAT [AGAT]9 [AGAC]6 AGAT 16 16_9 16_9_6_0 11 +Positive_Control Project1 Analysis1 D10S1248 GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA [GGAA]13 [GGAA]13 13 13_13 13_13 1050 +Positive_Control Project1 Analysis1 D10S1248 GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA [GGAA]15 [GGAA]15 15 15_15 15_15 1022 +Positive_Control Project1 Analysis1 D10S1248 GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA [GGAA]12 [GGAA]12 12 12_12 12_12 163 +Positive_Control Project1 Analysis1 D10S1248 GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA [GGAA]14 [GGAA]14 14 14_14 14_14 116 +Positive_Control Project1 Analysis1 CSF1PO AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT [AGAT]12 [ATCT]12 12 12_12 12_12_0 702 +Positive_Control Project1 Analysis1 CSF1PO AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT [AGAT]11 [ATCT]11 11 11_11 11_11_0 29 +Positive_Control Project1 Analysis1 CSF1PO AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT [AGAT]13 [ATCT]13 13 13_13 13_13_0 11 diff --git a/lusSTR/tests/data/lusstr_output/Positive_Control_evidence_ngs.csv b/lusSTR/tests/data/lusstr_output/Positive_Control_evidence_ngs.csv new file mode 100644 index 00000000..f5901b88 --- /dev/null +++ b/lusSTR/tests/data/lusstr_output/Positive_Control_evidence_ngs.csv @@ -0,0 +1,84 @@ +Locus,CE Allele,Allele Seq,Reads +CSF1PO,11.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,29 +CSF1PO,12.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,702 +D10S1248,12.0,GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA,163 +D10S1248,13.0,GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA,1050 +D10S1248,14.0,GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA,116 +D10S1248,15.0,GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA,1022 +D12S391,17.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGAT,142 +D12S391,18.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGAT,974 +D12S391,22.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGACAGACAGAC,137 +D12S391,23.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGACAGACAGACAGAC,751 +D13S317,9.0,TATCTATCTATCTATCTATCTATCTATCTATCTATCAATCAATCATCTATCTATCTTTCTGTCTGTC,958 +D13S317,10.0,TATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCAATCATCTATCTATCTTTCTGTCTGTC,34 +D13S317,11.0,TATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCAATCATCTATCTATCTTTCTGTCTGTC,831 +D16S539,8.0,GATAGATAGATAGATAGATAGATAGATAGATA,69 +D16S539,9.0,GATAGATAGATAGATAGATAGATAGATAGATAGATA,1019 +D16S539,12.0,GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATA,135 +D16S539,13.0,GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATA,996 +D17S1301,10.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,91 +D17S1301,11.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,528 +D17S1301,12.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,599 +D18S51,15.0,AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAGAG,124 +D18S51,16.0,AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAGAG,1009 +D18S51,17.0,AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAGAG,165 +D18S51,18.0,AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAGAG,999 +D19S433,12.0,AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG,122 +D19S433,13.0,AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG,1782 +D19S433,14.0,AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG,1621 +D1S1656,11.0,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGGTGTGTGTGTG,33 +D1S1656,12.0,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGGTGTGTGTGTG,231 +D1S1656,13.0,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATGTGTGTGTG,162 +D20S482,13.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,337 +D20S482,14.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,3136 +D20S482,15.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,2731 +D21S11,28.0,TCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATCATCTATCTATCCATATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,84 +D21S11,29.0,TCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATCATCTATCTATCCATATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,1144 +D21S11,31.2,TCTATCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATCATCTATCTATCCATATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATATCTA,1064 +D22S1045,15.0,ATTATTATTATTATTATTATTATTATTATTATTATTACTATTATT,146 +D22S1045,16.0,ATTATTATTATTATTATTATTATTATTATTATTATTATTACTATTATT,1746 +D2S1338,21.0,TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,246 +D2S1338,22.0,TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,2165 +D2S1338,24.0,TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,259 +D2S1338,25.0,TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,1656 +D2S441,10.0,TCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,1781 +D2S441,14.0,TCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATTTATCTATCTA,1330 +D3S1358,16.0,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,135 +D3S1358,17.0,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,1597 +D3S1358,18.0,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,1855 +D4S2408,8.0,ATCTATCTATCTATCTATCTATCTATCTATCT,38 +D4S2408,9.0,ATCTATCTATCTATCTATCTATCTATCTATCTATCT,1357 +D5S818,11.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAG,21 +D5S818,12.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAG,382 +D6S1043,11.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,219 +D6S1043,12.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,2088 +D6S1043,19.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATACATAGATAGATAGATAGATAGAT,138 +D6S1043,20.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATACATAGATAGATAGATAGATAGAT,1487 +D7S820,8.0,GATAGATAGATAGATAGATAGATAGATAGATAGACAGATTGATAGTTT,517 +D7S820,10.0,GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGATTGATAGTTT,33 +D7S820,11.0,GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGATTGATAGTTT,522 +D8S1179,13.0,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,135 +D8S1179,14.0,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,971 +D8S1179,15.0,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,661 +D9S1122,11.0,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,131 +D9S1122,11.0,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,115 +D9S1122,12.0,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,1551 +D9S1122,12.0,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,1427 +FGA,19.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,135 +FGA,20.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,1043 +FGA,22.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,182 +FGA,23.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,1023 +PentaD,12.0,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,289 +PentaD,13.0,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,278 +PentaE,7.0,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,505 +PentaE,14.0,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,261 +TH01,5.0,AATGAATGAATGAATGAATG,247 +TH01,6.0,AATGAATGAATGAATGAATGAATG,4542 +TH01,8.3,AATGAATGAATGAATGAATGATGAATGAATGAATG,151 +TH01,9.3,AATGAATGAATGAATGAATGAATGATGAATGAATGAATG,3581 +TPOX,10.0,AATGAATGAATGAATGAATGAATGAATGAATGAATGAATG,59 +TPOX,11.0,AATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATG,1216 +vWA,15.0,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA,23 +vWA,16.0,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA,377 +vWA,18.0,TCTATCTGTCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA,43 +vWA,19.0,TCTATCTGTCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA,325 diff --git a/lusSTR/tests/data/powerseq_flanking_anno_test.csv b/lusSTR/tests/data/powerseq_flanking_convert_test.csv similarity index 100% rename from lusSTR/tests/data/powerseq_flanking_anno_test.csv rename to lusSTR/tests/data/powerseq_flanking_convert_test.csv diff --git a/lusSTR/tests/data/powerseq_flanking_anno_test_flanks_anno.txt b/lusSTR/tests/data/powerseq_flanking_convert_test_flanks.txt similarity index 100% rename from lusSTR/tests/data/powerseq_flanking_anno_test_flanks_anno.txt rename to lusSTR/tests/data/powerseq_flanking_convert_test_flanks.txt diff --git a/lusSTR/tests/data/powerseq_flanking_anno_test_sexloci.csv b/lusSTR/tests/data/powerseq_flanking_convert_test_sexloci.csv similarity index 100% rename from lusSTR/tests/data/powerseq_flanking_anno_test_sexloci.csv rename to lusSTR/tests/data/powerseq_flanking_convert_test_sexloci.csv diff --git a/lusSTR/tests/data/powerseq_flanking_anno_test_sexloci.txt b/lusSTR/tests/data/powerseq_flanking_convert_test_sexloci.txt similarity index 100% rename from lusSTR/tests/data/powerseq_flanking_anno_test_sexloci.txt rename to lusSTR/tests/data/powerseq_flanking_convert_test_sexloci.txt diff --git a/lusSTR/tests/data/powerseq_flanking_anno_test_sexloci_flanks_anno.txt b/lusSTR/tests/data/powerseq_flanking_convert_test_sexloci_flanks.txt similarity index 100% rename from lusSTR/tests/data/powerseq_flanking_anno_test_sexloci_flanks_anno.txt rename to lusSTR/tests/data/powerseq_flanking_convert_test_sexloci_flanks.txt diff --git a/lusSTR/tests/data/test_FGA_short_seq.csv b/lusSTR/tests/data/test_FGA_short_seq.csv index d3b47c10..d3a22c40 100644 --- a/lusSTR/tests/data/test_FGA_short_seq.csv +++ b/lusSTR/tests/data/test_FGA_short_seq.csv @@ -1 +1,7 @@ -Locus,Total_Reads,Sequence,SampleID,Project,Analysis FGA,547,CCAGCAAAAAAGAAAGAAAGAAA,Sample1,Project1,Analysis1 FGA,547,CCAGCAAAAAAGAAAGGAAGAAA,Sample1,Project1,Analysis1 FGA,547,CCAGCAAAAAAGAAAGGAAGAAA,Sample1,Project1,Analysis1 FGA,547,CCAGCAAAAAAGAAAGAAAGAAA,Sample1,Project1,Analysis1 FGA,547,CCAGCAAAAAAGAAAGAAAGAAA,Sample1,Project1,Analysis1 \ No newline at end of file +Locus,Total_Reads,Sequence,SampleID,Project,Analysis +FGA,547,CCAGCAAAAAAGAAAGAAAGAAA,Sample1,Project1,Analysis1 +FGA,547,CCAGCAAAAAAGAAAGGAAGAAA,Sample1,Project1,Analysis1 +FGA,547,CCAGCAAAAAAGAAAGGAAGAAA,Sample1,Project1,Analysis1 +FGA,547,CCAGCAAAAAAGAAAGAAAGAAA,Sample1,Project1,Analysis1 +FGA,547,CCAGCAAAAAAGAAAGAAAGAAA,Sample1,Project1,Analysis1 +CSF1PO,100,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTACCTATCTATCTATCTAATCTATCTATCTT,Sample1,Project1,Analysis1 \ No newline at end of file diff --git a/lusSTR/tests/data/testflanks_flanks_anno.txt b/lusSTR/tests/data/testflanks_flanks.txt similarity index 100% rename from lusSTR/tests/data/testflanks_flanks_anno.txt rename to lusSTR/tests/data/testflanks_flanks.txt diff --git a/lusSTR/tests/data/testformat_sr_sexloci_flanks_anno.txt b/lusSTR/tests/data/testformat_sr_sexloci_flanks.txt similarity index 100% rename from lusSTR/tests/data/testformat_sr_sexloci_flanks_anno.txt rename to lusSTR/tests/data/testformat_sr_sexloci_flanks.txt diff --git a/lusSTR/tests/test_filters.py b/lusSTR/tests/test_filters.py index c33e2cff..fbc61f9c 100644 --- a/lusSTR/tests/test_filters.py +++ b/lusSTR/tests/test_filters.py @@ -13,9 +13,11 @@ import filecmp import json import lusSTR -from lusSTR.filter_settings import get_filter_metadata_file +from lusSTR.scripts.filter_settings import get_filter_metadata_file from lusSTR.tests import data_file +import os import pytest +import shutil from tempfile import NamedTemporaryFile @@ -34,7 +36,7 @@ ) def test_thresholds(filter, locus, total_reads, allele_reads, final_reads, pass_filt): metadata = filter_marker_data[locus] - test_total_reads, test_passfilt = lusSTR.filter_settings.thresholds( + test_total_reads, test_passfilt = lusSTR.scripts.filter_settings.thresholds( filter, metadata, total_reads, allele_reads ) assert test_total_reads == final_reads @@ -45,7 +47,9 @@ def test_thresholds(filter, locus, total_reads, allele_reads, final_reads, pass_ "perc, perc_stut, reads, forward_threshold", [(0, 0.18, 100, 4), (0.15, 0.21, 100, 15)] ) def test_forward_stutter_threshold(perc, perc_stut, reads, forward_threshold): - test_forward_thresh = lusSTR.filter_settings.forward_stut_thresh(perc, perc_stut, reads) + test_forward_thresh = lusSTR.scripts.filter_settings.forward_stut_thresh( + perc, perc_stut, reads + ) assert test_forward_thresh == forward_threshold @@ -73,7 +77,7 @@ def test_minus1stutter( called_allele_type, stut_perc, ): - test_stutter_type, test_stut_perc = lusSTR.filter_settings.minus1_stutter( + test_stutter_type, test_stut_perc = lusSTR.scripts.filter_settings.minus1_stutter( all_type, stutter_thresh, forward_thresh, @@ -110,7 +114,7 @@ def test_minus2stutter( called_allele_type, stut_perc, ): - test_stutter_type, test_stut_perc = lusSTR.filter_settings.minus2_stutter( + test_stutter_type, test_stut_perc = lusSTR.scripts.filter_settings.minus2_stutter( all_type, stutter_thresh, forward_thresh, @@ -146,7 +150,7 @@ def test_plus1stutter( called_allele_type, stut_perc, ): - test_stutter_type, test_stut_perc = lusSTR.filter_settings.plus1_stutter( + test_stutter_type, test_stut_perc = lusSTR.scripts.filter_settings.plus1_stutter( all_type, stutter_thresh, forward_thresh, ref_reads, al1_ref_reads, al_reads ) assert test_stutter_type == called_allele_type @@ -154,106 +158,85 @@ def test_plus1stutter( def test_EFMoutput_format(tmp_path): - input_file = data_file("test_stutter.txt") + str_path = str(tmp_path / "WD") + inputfile = data_file("test_stutter.txt") exp_out = data_file("RU_stutter_test/test_filtering_EFMoutput.csv") exp_info_out = data_file("RU_stutter_test/test_filtering_EFMoutput_sequence_info.csv") - obs_out = str(tmp_path / "test_output.csv") - obs_info_out = str(tmp_path / "test_output_sequence_info.csv") - arglist = ["filter", "-o", obs_out, "--output-type", "efm", "--info", input_file] - args = lusSTR.cli.get_parser().parse_args(arglist) - lusSTR.filter.main(args) + obs_out = str(tmp_path / "WD/test_output/test_output_evidence_ce.csv") + obs_info_out = str(tmp_path / "WD/test_output/test_output_sequence_info.csv") + arglist = ["config", "-w", str_path, "-o", "test_output", "--efm", "--ce", "--input", "WD"] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist)) + shutil.copyfile(inputfile, os.path.join(str_path, "test_output.csv")) + shutil.copyfile(inputfile, os.path.join(str_path, "test_output.txt")) + all_arglist = ["strs", "all", "-w", str_path] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(all_arglist)) assert filecmp.cmp(exp_out, obs_out) is True - assert filecmp.cmp(exp_info_out, obs_info_out) is True @pytest.mark.parametrize( "outputdir, datatype", [("RU_stutter_test/", "ce"), ("NGS_stutter_test/", "ngs")] ) def test_STRmixoutput_format(outputdir, datatype, tmp_path): - input_file = data_file("test_stutter.txt") + str_path = str(tmp_path / "WD") + inputfile = data_file("test_stutter.txt") exp_out = data_file(f"{outputdir}Sample1_{datatype}.csv") exp_info_out = data_file(f"{outputdir}STRmix_Files_sequence_info.csv") - obs_outdir = str(tmp_path / "STRmix_Files") - obs_out = str(tmp_path / f"STRmix_Files/Sample1_{datatype}.csv") - obs_info_out = str(tmp_path / f"STRmix_Files/sequence_info.csv") - arglist = [ - "filter", - "-o", - obs_outdir, - "--output-type", - "strmix", - "--info", - "--data-type", - datatype, - input_file, - ] - args = lusSTR.cli.get_parser().parse_args(arglist) - lusSTR.filter.main(args) + obs_out = str(tmp_path / f"WD/STRmix_Files/Sample1_evidence_{datatype}.csv") + obs_info_out = str(tmp_path / f"WD/STRmix_Files/STRmix_Files_sequence_info.csv") + if datatype == "ngs": + arglist = ["config", "-w", str_path, "--input", "WD", "-o", "STRmix_Files"] + else: + arglist = ["config", "-w", str_path, "--input", "WD", "-o", "STRmix_Files", "--ce"] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist)) + shutil.copyfile(inputfile, os.path.join(str_path, "STRmix_Files.csv")) + shutil.copyfile(inputfile, os.path.join(str_path, "STRmix_Files.txt")) + all_arglist = ["strs", "all", "-w", str_path] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(all_arglist)) assert filecmp.cmp(exp_out, obs_out) is True assert filecmp.cmp(exp_info_out, obs_info_out) is True -def test_stdout(capsys): - input_file = data_file("test_stutter.txt") - output = data_file("RU_stutter_test/test_filtering_EFMoutput.csv") - arglist = ["filter", "--output-type", "efm", input_file] - args = lusSTR.cli.get_parser().parse_args(arglist) - lusSTR.filter.main(args) - with open(output, "r") as fh: - exp_out = fh.read().strip() - terminal = capsys.readouterr() - obs_out = terminal.out.strip() - assert obs_out == exp_out - - def test_nofilters(tmp_path): - input_file = data_file("test_stutter.txt") + str_path = str(tmp_path / "WD") + inputfile = data_file("test_stutter.txt") exp_out = data_file("NGS_stutter_test/Sample1_nofilter.csv") - obs_out = str(tmp_path / "Sample1_ngs.csv") - arglist = [ - "filter", - "-o", - str(tmp_path), - "--output-type", - "strmix", - "--no-filters", - "--data-type", - "ngs", - input_file, - ] - args = lusSTR.cli.get_parser().parse_args(arglist) - lusSTR.filter.main(args) + obs_out = str(tmp_path / "WD/lusstr_output/Sample1_evidence_ngs.csv") + arglist = ["config", "-w", str_path, "--input", "WD", "--nofilter"] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist)) + shutil.copyfile(inputfile, os.path.join(str_path, "lusstr_output.csv")) + shutil.copyfile(inputfile, os.path.join(str_path, "lusstr_output.txt")) + all_arglist = ["strs", "all", "-w", str_path] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(all_arglist)) assert filecmp.cmp(exp_out, obs_out) is True def test_flags(tmp_path): - input_file = data_file("test_stutter.txt") + str_path = str(tmp_path / "WD") + inputfile = data_file("test_stutter.txt") exp_out = data_file("RU_stutter_test/Flagged_Loci.csv") - obs_outdir = str(tmp_path / "RU_stutter_test") - obs_out = str(tmp_path / "RU_stutter_test/Flagged_Loci.csv") - arglist = ["filter", "-o", obs_outdir, "--output-type", "strmix", "--info", input_file] - args = lusSTR.cli.get_parser().parse_args(arglist) - lusSTR.filter.main(args) + obs_out = str(tmp_path / "WD/lusstr_output/lusstr_output_Flagged_Loci.csv") + arglist = ["config", "-w", str_path, "--input", "WD"] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist)) + shutil.copyfile(inputfile, os.path.join(str_path, "lusstr_output.csv")) + shutil.copyfile(inputfile, os.path.join(str_path, "lusstr_output.txt")) + all_arglist = ["strs", "all", "-w", str_path] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(all_arglist)) assert filecmp.cmp(exp_out, obs_out) is True def test_efm_reference(tmp_path): - input_file = data_file("test_references.txt") + str_path = str(tmp_path / "WD") + inputfile = data_file("test_references.txt") exp_out = data_file("RU_stutter_test/EFM_test_reference.csv") - obs_out = str(tmp_path / "test_output.csv") - obs_efm_out = str(tmp_path / "test_output_reference.csv") - arglist = [ - "filter", - "-o", - obs_out, - "--output-type", - "efm", - "--profile-type", - "reference", - input_file, - ] - args = lusSTR.cli.get_parser().parse_args(arglist) - lusSTR.filter.main(args) + obs_efm_out = str(tmp_path / "WD/lusstr_output/lusstr_output_reference_ce.csv") + arglist = ["config", "-w", str_path, "--input", "WD", "--efm", "--reference", "--ce"] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist)) + shutil.copyfile(inputfile, os.path.join(str_path, "lusstr_output.csv")) + shutil.copyfile(inputfile, os.path.join(str_path, "lusstr_output.txt")) + print(os.listdir(str_path)) + all_arglist = ["strs", "all", "-w", str_path] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(all_arglist)) + print(os.listdir(f"{str_path}/lusstr_output")) assert filecmp.cmp(exp_out, obs_efm_out) is True @@ -261,33 +244,45 @@ def test_efm_reference(tmp_path): "outputdir, datatype", [("RU_stutter_test/", "ce"), ("NGS_stutter_test/", "ngs")] ) def test_strmix_reference(outputdir, datatype, tmp_path): - input_file = data_file("test_references.txt") + str_path = str(tmp_path / "WD") + inputfile = data_file("test_references.txt") exp_out = data_file(f"{outputdir}Positive_Control_reference_{datatype}.csv") - obs_out = str(tmp_path / f"Positive_Control_reference_{datatype}.csv") - arglist = [ - "filter", - "-o", - str(tmp_path), - "--output-type", - "strmix", - "--profile-type", - "reference", - "--data-type", - datatype, - input_file, - ] - args = lusSTR.cli.get_parser().parse_args(arglist) - lusSTR.filter.main(args) + obs_out = str(tmp_path / f"WD/STRmix_Files/Positive_Control_reference_{datatype}.csv") + if datatype == "ngs": + arglist = ["config", "-w", str_path, "--input", "WD", "-o", "STRmix_Files", "--reference"] + else: + arglist = [ + "config", + "-w", + str_path, + "--input", + "WD", + "-o", + "STRmix_Files", + "--ce", + "--reference", + ] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist)) + shutil.copyfile(inputfile, os.path.join(str_path, "STRmix_Files.csv")) + shutil.copyfile(inputfile, os.path.join(str_path, "STRmix_Files.txt")) + all_arglist = ["strs", "all", "-w", str_path] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(all_arglist)) assert filecmp.cmp(exp_out, obs_out) is True def test_D7(tmp_path): - input_file = data_file("test_D7.txt") + str_path = str(tmp_path / "WD") + inputfile = data_file("test_D7.txt") exp_out = data_file("D7_microvariant_flagged.csv") - obs_out = str(tmp_path / "Flagged_Loci.csv") - arglist = ["filter", "-o", str(tmp_path), "--output-type", "strmix", "--info", input_file] - args = lusSTR.cli.get_parser().parse_args(arglist) - lusSTR.filter.main(args) + obs_out = str(tmp_path / "WD/test/test_Flagged_Loci.csv") + arglist = ["config", "-w", str_path, "--input", "WD", "-o", "test"] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist)) + shutil.copyfile(inputfile, os.path.join(str_path, "test.csv")) + shutil.copyfile(inputfile, os.path.join(str_path, "test.txt")) + all_arglist = ["strs", "all", "-w", str_path] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(all_arglist)) + assert filecmp.cmp(exp_out, obs_out) is True + assert filecmp.cmp(exp_out, obs_out) @@ -303,5 +298,7 @@ def test_D7(tmp_path): ], ) def test_ngs_stutter(ref_bracket, quest_bracket, stutter, actual_call): - test_stut = lusSTR.filter_settings.bracketed_stutter_id(ref_bracket, quest_bracket, stutter) + test_stut = lusSTR.scripts.filter_settings.bracketed_stutter_id( + ref_bracket, quest_bracket, stutter + ) assert test_stut == actual_call diff --git a/lusSTR/tests/test_format.py b/lusSTR/tests/test_format.py index 21815c62..6a761773 100644 --- a/lusSTR/tests/test_format.py +++ b/lusSTR/tests/test_format.py @@ -19,92 +19,71 @@ from tempfile import NamedTemporaryFile -def test_format(): +def test_format(tmp_path): UAStestfile = data_file("snps/Positive Control Sample Details Report 2315.xlsx") - formatoutput = data_file("testformat.csv") - with NamedTemporaryFile(suffix=".csv") as outfile: - arglist = ["format", UAStestfile, "-o", outfile.name, "--uas"] - args = lusSTR.cli.get_parser().parse_args(arglist) - lusSTR.format.main(args) - assert filecmp.cmp(formatoutput, outfile.name) is True - - -def test_format_stdout(capsys): - UAStestfile = data_file("snps/Positive Control Sample Details Report 2315.xlsx") - formatoutput = data_file("testformat.csv") - arglist = ["format", UAStestfile, "--uas"] - args = lusSTR.cli.get_parser().parse_args(arglist) - lusSTR.format.main(args) - with open(formatoutput, "r") as fh: - exp_out = fh.read().strip() - terminal = capsys.readouterr() - obs_out = terminal.out.strip() - assert obs_out == exp_out + exp_output = data_file("testformat.csv") + obs_output = str(tmp_path / "lusstr_output.csv") + str_path = str(tmp_path) + config_arglist = ["config", "--input", str(UAStestfile), "-w", str_path] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(config_arglist)) + format_arglist = ["strs", "format", "-w", str_path] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(format_arglist)) + assert filecmp.cmp(exp_output, obs_output) is True @pytest.mark.parametrize( "input, testoutput", [ - ("STRait_Razor_test_output", "STRait_Razor_test_output.csv"), + ("STRait_Razor_test_output/", "STRait_Razor_test_output.csv"), ("STRait_Razor_test_output/A001.txt", "STRaitRazor_output_test_A001.csv"), ], ) -def test_format_straitrazor(input, testoutput): - with NamedTemporaryFile() as outfile: - inputdb = data_file(input) - testformat = data_file(testoutput) - arglist = ["format", inputdb, "-o", outfile.name] - args = lusSTR.cli.get_parser().parse_args(arglist) - lusSTR.format.main(args) - assert filecmp.cmp(testformat, outfile.name) is True +def test_format_straitrazor(input, testoutput, tmp_path): + input_file = data_file(input) + exp_output = data_file(testoutput) + obs_output = str(tmp_path / "lusstr_output.csv") + str_path = str(tmp_path) + config_arglist = ["config", "--input", str(input_file), "-w", str_path, "--straitrazor"] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(config_arglist)) + format_arglist = ["strs", "format", "-w", str_path] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(format_arglist)) + assert filecmp.cmp(exp_output, obs_output) is True -def test_format_sexloci_uas(): +def test_format_sexloci_uas(tmp_path): UAStestfile = data_file("snps/Positive Control Sample Details Report 2315.xlsx") - formatoutput = data_file("testformat_uas_sexloci.csv") - with NamedTemporaryFile(suffix=".csv") as outfile: - arglist = ["format", UAStestfile, "-o", outfile.name, "--uas", "--include-sex"] - args = lusSTR.cli.get_parser().parse_args(arglist) - lusSTR.format.main(args) - outfile_name = os.path.splitext(outfile.name)[0] - outfile_name_output = f"{outfile_name}_sexloci.csv" - assert filecmp.cmp(formatoutput, outfile_name_output) is True + exp_output = data_file("testformat_uas_sexloci.csv") + obs_output = str(tmp_path / "lusstr_output_sexloci.csv") + str_path = str(tmp_path) + config_arglist = ["config", "--input", str(UAStestfile), "-w", str_path, "--sex"] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(config_arglist)) + format_arglist = ["strs", "format", "-w", str_path] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(format_arglist)) + assert filecmp.cmp(exp_output, obs_output) is True def test_format_sex_loci_straitrazor(tmp_path): inputdb = data_file("STRait_Razor_test_output") - exp_out = data_file("testformat_sr_sexloci.csv") - obs_out = str(tmp_path / "sr.csv") - obs_out_sex = str(tmp_path / "sr_sexloci.csv") - arglist = ["format", inputdb, "-o", obs_out, "--include-sex"] - args = lusSTR.cli.get_parser().parse_args(arglist) - lusSTR.format.main(args) - assert filecmp.cmp(exp_out, obs_out_sex) is True - - -def test_uas_directory_autosomal_only(tmp_path): - inputdb = data_file("UAS_bulk_input") - copydb = str(tmp_path / "UAS_bulk_input") - copytree(inputdb, copydb) - bogusfile = os.path.join(copydb, "bogusfile.txt") - with open(bogusfile, "w") as fh: - pass - exp_out_auto = data_file("UAS_bulk_test.csv") - obs_out_auto = str(tmp_path / "format_output.csv") - arglist = ["format", "-o", obs_out_auto, "--uas", copydb] - args = lusSTR.cli.get_parser().parse_args(arglist) - lusSTR.format.main(args) - assert filecmp.cmp(exp_out_auto, obs_out_auto) is True + exp_output = data_file("testformat_sr_sexloci.csv") + obs_output = str(tmp_path / "lusstr_output_sexloci.csv") + str_path = str(tmp_path) + config_arglist = ["config", "--input", str(inputdb), "-w", str_path, "--straitrazor", "--sex"] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(config_arglist)) + format_arglist = ["strs", "format", "-w", str_path] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(format_arglist)) + assert filecmp.cmp(exp_output, obs_output) is True def test_uas_directory_with_xy(tmp_path): inputdb = data_file("UAS_bulk_input") - exp_out_auto = data_file("UAS_bulk_test.csv") - exp_out_sex = data_file("UAS_bulk_test_sexloci.csv") - obs_out_auto = str(tmp_path / "format_output.csv") - obs_out_sex = str(tmp_path / "format_output_sexloci.csv") - arglist = ["format", "-o", obs_out_auto, "--uas", "--include-sex", inputdb] - args = lusSTR.cli.get_parser().parse_args(arglist) - lusSTR.format.main(args) - assert filecmp.cmp(exp_out_auto, obs_out_auto) is True - assert filecmp.cmp(exp_out_sex, obs_out_sex) is True + exp_output_sex = data_file("UAS_bulk_test_sexloci.csv") + obs_sex_output = str(tmp_path / "lusstr_output_sexloci.csv") + exp_output = data_file("UAS_bulk_test.csv") + obs_output = str(tmp_path / "lusstr_output.csv") + str_path = str(tmp_path) + config_arglist = ["config", "--input", str(inputdb), "-w", str_path, "--sex"] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(config_arglist)) + format_arglist = ["strs", "format", "-w", str_path] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(format_arglist)) + assert filecmp.cmp(exp_output_sex, obs_sex_output) is True + assert filecmp.cmp(exp_output, obs_output) is True diff --git a/lusSTR/tests/test_marker.py b/lusSTR/tests/test_marker.py index b581a9cb..05483a4b 100644 --- a/lusSTR/tests/test_marker.py +++ b/lusSTR/tests/test_marker.py @@ -12,7 +12,7 @@ import pytest import lusSTR -from lusSTR.marker import STRMarkerObject +from lusSTR.scripts.marker import STRMarkerObject @pytest.mark.parametrize( @@ -47,21 +47,21 @@ ) def test_D21_bracket(sequence, bracket_form): marker = STRMarkerObject("D21S11", sequence, uas=True) - assert marker.annotation == bracket_form + assert marker.convert == bracket_form -def test_D19_annotation(): +def test_D19_convert(): uas_sequence = ( "AAGGAAAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAAGAAGAAAGAGAG" ) marker = STRMarkerObject("D19S433", uas_sequence, uas=True) - assert marker.annotation == "CT CTCT TTCT TCTT CTCT [CCTT]14 CCTA CCTT TT CCTT" + assert marker.convert == "CT CTCT TTCT TCTT CTCT [CCTT]14 CCTA CCTT TT CCTT" -def test_D1_annotation(): +def test_D1_convert(): uas_sequence = "TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATGTGTATGTG" marker = STRMarkerObject("D1S1656", uas_sequence, uas=True) - assert marker.annotation == "CA CATA CACA [TCTA]11" + assert marker.convert == "CA CATA CACA [TCTA]11" @pytest.mark.parametrize( @@ -79,26 +79,24 @@ def test_D1_annotation(): ("AAAAGAAAAAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA", "AAAAG AAAAA [AAAGA]8"), ], ) -def test_PentaD_annotation(sequence, bracket_form): +def test_PentaD_convert(sequence, bracket_form): marker = STRMarkerObject("PENTA D", sequence, uas=True) - assert marker.annotation == bracket_form + assert marker.convert == bracket_form -def test_FGA_anno(): +def test_FGA_convert(): uas_sequence = ( "TTTCTTTCTTTCTTTCTTTTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTGTCTGTCTGTCTTTCTTTCTTTCTTTCTT" "TCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTCCTTCCTTCCTTTCTTTCTTTCTCCTTCCTTCCTTCCTTCC" ) - annotation = "[GGAA]4 GGAG [AAAG]3 [GAAG]3 [AAAG]15 [ACAG]3 [AAAG]9 AA AAAA [GAAA]4" + convert = "[GGAA]4 GGAG [AAAG]3 [GAAG]3 [AAAG]15 [ACAG]3 [AAAG]9 AA AAAA [GAAA]4" marker = STRMarkerObject("FGA", uas_sequence, uas=True) - print(annotation) - print(marker.annotation) - assert marker.annotation == annotation + assert marker.convert == convert def test_THO1(): marker = STRMarkerObject("TH01", "AATGAATGAATGAATGAATGATGATGAATGAATGAATG", uas=True) - assert marker.annotation == "[AATG]5 ATG ATG [AATG]3" + assert marker.convert == "[AATG]5 ATG ATG [AATG]3" @pytest.mark.parametrize( @@ -134,7 +132,7 @@ def test_THO1(): ), ], ) -def test_D21_anno(sequence, lus_allele, sec_allele, tert_allele): +def test_D21_convert(sequence, lus_allele, sec_allele, tert_allele): marker = STRMarkerObject("D21S11", sequence, uas=True) lus, sec, tert = marker.designation assert str(lus) == lus_allele @@ -210,9 +208,9 @@ def test_D21_lus_sec(): ), ], ) -def test_annotation_and_lus(locus, sequence, forward_bracket, lus, sec, tert): +def test_convert_and_lus(locus, sequence, forward_bracket, lus, sec, tert): marker = STRMarkerObject(locus, sequence, uas=True) - assert marker.annotation == forward_bracket + assert marker.convert == forward_bracket lus_out, sec_out, tert_out = marker.designation assert str(lus_out) == lus assert str(sec_out) == sec @@ -237,7 +235,7 @@ def test_annotation_and_lus(locus, sequence, forward_bracket, lus, sec, tert): ) def test_strobj_DYS389II(sequence, bracketed, conc, lus, sec, tert): marker = STRMarkerObject("DYS389II", sequence, uas=False, kit="forenseq") - assert marker.annotation == bracketed + assert marker.convert == bracketed assert marker.canonical == conc assert marker.designation == (lus, sec, tert) @@ -251,8 +249,8 @@ def test_strobj_CSF1PO(): ) assert marker.uas_sequence == "AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT" assert marker.forward_sequence == "ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT" - assert marker.annotation == "[ATCT]12" - assert marker.annotation_uas == "[AGAT]12" + assert marker.convert == "[ATCT]12" + assert marker.convert_uas == "[AGAT]12" assert marker.canonical == 12, " " assert marker.designation == ("12", "0", None) assert marker.flank_5p == "CT TCCT" @@ -268,8 +266,8 @@ def test_strobj_D10S1248(): ) assert marker.uas_sequence == "GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA" assert marker.forward_sequence == "GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA" - assert marker.annotation == "[GGAA]13" - assert marker.annotation_uas == "[GGAA]13" + assert marker.convert == "[GGAA]13" + assert marker.convert_uas == "[GGAA]13" assert marker.canonical == 13, " " assert marker.designation == ("13", None, None) @@ -284,8 +282,8 @@ def test_strobj_D1S1656(): ) assert marker.uas_sequence == "TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGGTGTGTGTGTG" assert marker.forward_sequence == "CACACACACACCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA" - assert marker.annotation == "CA [CACA]2 CCTA [TCTA]11" - assert marker.annotation_uas == "[TAGA]11 TAGG [TGTG]2 TG" + assert marker.convert == "CA [CACA]2 CCTA [TCTA]11" + assert marker.convert_uas == "[TAGA]11 TAGG [TGTG]2 TG" assert marker.canonical == 12, " " assert marker.designation == ("11", "1", "0") @@ -299,8 +297,8 @@ def test_strobj_D5S818(): ) assert marker.uas_sequence == "AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAG" assert marker.forward_sequence == "CTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT" - assert marker.annotation == "CTCT [ATCT]12" - assert marker.annotation_uas == "[AGAT]12 AGAG" + assert marker.convert == "CTCT [ATCT]12" + assert marker.convert_uas == "[AGAT]12 AGAG" assert marker.canonical == 12, " " assert marker.designation == ("12", None, None) @@ -317,7 +315,7 @@ def test_strobj_D16S539(): assert marker.forward_sequence == "GATAGATAGATAGATTGATTGATAGATAGATAGATAGATA" assert marker.flank_5p == "TC CTCT T CCCT AGAT CAAT [ACAG]4 GTG" assert marker.flank_3p == "TCAT TGAA AGAC AAA A CAGA [GATG]2 ATA GA T AC" - assert marker.annotation == "[GATA]3 [GATT]2 [GATA]5" + assert marker.convert == "[GATA]3 [GATT]2 [GATA]5" def test_strobj_D7S820(): @@ -332,7 +330,7 @@ def test_strobj_D7S820(): assert marker.forward_sequence == "AAAACTATCAATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATC" assert marker.flank_5p == "T ATTT AGTG AGAT AAAAAA" assert marker.flank_3p == "GTTA [GTTC]2 TAAA CTAT" - assert marker.annotation == "A AAAC TATC AATC TGTC [TATC]10" + assert marker.convert == "A AAAC TATC AATC TGTC [TATC]10" def test_strobj_D3S1358(): @@ -340,7 +338,7 @@ def test_strobj_D3S1358(): marker = STRMarkerObject("D3S1358", sequence, uas=True, kit="forenseq") assert marker.forward_sequence == sequence assert marker.uas_sequence == sequence - assert marker.annotation == "TCTA [TCTG]2 [TCTA]9 [ACTA]2 [TCTA]2" + assert marker.convert == "TCTA [TCTG]2 [TCTA]9 [ACTA]2 [TCTA]2" def test_strobj_D19S433_newformat(): @@ -351,7 +349,7 @@ def test_strobj_D19S433_newformat(): uas=False, kit="forenseq", ) - assert marker.annotation == "CT CTCT TTCT TCCT CTCT [CCTT]12 CCTA [CCTT]3" + assert marker.convert == "CT CTCT TTCT TCCT CTCT [CCTT]12 CCTA [CCTT]3" def test_strobj_D21S11_newformat(): @@ -363,16 +361,14 @@ def test_strobj_D21S11_newformat(): uas=False, kit="forenseq", ) - assert marker.annotation == ( - "[TCTA]5 [TCTG]6 [TCTA]3 TA [TCTA]3 TCA [TCTA]2 TCCA TA [TCTA]11 TA" - ) + assert marker.convert == ("[TCTA]5 [TCTG]6 [TCTA]3 TA [TCTA]3 TCA [TCTA]2 TCCA TA [TCTA]11 TA") def test_strobj_FGA_newformat(): marker = STRMarkerObject( "FGA", "CCAGCAAAAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA", uas=False, kit="forenseq" ) - assert marker.annotation == "AAAA [AGAA]3 A" + assert marker.convert == "AAAA [AGAA]3 A" def test_strobj_DYS643_foren(): @@ -383,7 +379,7 @@ def test_strobj_DYS643_foren(): uas=False, kit="forenseq", ) - assert marker.annotation == "[CTTTT]10 CTTTC TTTT" + assert marker.convert == "[CTTTT]10 CTTTC TTTT" assert str(marker.canonical) == "10" assert marker.designation == ("10", None, None) assert marker.flank_3p == "TAAAA CTT" @@ -397,7 +393,7 @@ def test_strobj_DYS635_foren(): uas=False, kit="forenseq", ) - assert marker.annotation == "[TAGA]12 [TACA]2 [TAGA]2 [TACA]2 [TAGA]4" + assert marker.convert == "[TAGA]12 [TACA]2 [TAGA]2 [TACA]2 [TAGA]4" assert str(marker.canonical) == "22" assert marker.designation == ("12", None, None) assert marker.flank_5p == "A [TCAA]2 TGAA TGGA TAAA GAAA ATGT GA" @@ -411,7 +407,7 @@ def test_strobj_DYS612(): uas=False, kit="forenseq", ) - assert marker.annotation == "[CCT]5 CTT [TCT]4 CCT [TCT]24" + assert marker.convert == "[CCT]5 CTT [TCT]4 CCT [TCT]24" assert marker.canonical == 29 assert marker.designation == (24, 4, None) assert marker.flank_3p == "G TCA CTT TTC CAA [ATT]2 TTC TTT T" @@ -425,7 +421,7 @@ def test_strobj_DYS576_foren(): uas=False, kit="forenseq", ) - assert marker.annotation == "[AAAG]17" + assert marker.convert == "[AAAG]17" assert str(marker.canonical) == "17" assert marker.designation == ("17", None, None) assert marker.flank_3p == "AAAA AGCC AAGA CAAA TACG CTTA TTAC TCCC ATCT CCT" @@ -439,7 +435,7 @@ def test_strobj_DYS549_foren(): uas=False, kit="forenseq", ) - assert marker.annotation == "[GATA]13" + assert marker.convert == "[GATA]13" assert str(marker.canonical) == "13" assert marker.designation == ("13", None, None) assert marker.flank_5p == ( @@ -455,7 +451,7 @@ def test_strobj_DYS533(): uas=False, kit="forenseq", ) - assert marker.annotation == "[TATC]13" + assert marker.convert == "[TATC]13" assert str(marker.canonical) == "13" assert marker.designation == ("13", None, None) assert marker.flank_3p == "ATCT ATCA TCTT CTAT TGTT T" @@ -470,7 +466,7 @@ def test_strobj_DYS522(): uas=False, kit="forenseq", ) - assert marker.annotation == "ATA GATG [ATAG]12" + assert marker.convert == "ATA GATG [ATAG]12" assert marker.canonical == 12 assert marker.designation == ("12", None, None) @@ -501,7 +497,7 @@ def test_strobj_DYS522(): ) def test_strobj_DYS439(sequence, bracketed, conc, lus, sec, tert, kit): marker = STRMarkerObject("DYS439", sequence, uas=False, kit=kit) - assert marker.annotation == bracketed + assert marker.convert == bracketed assert str(marker.canonical) == conc assert marker.designation == (lus, sec, tert) @@ -515,7 +511,7 @@ def test_strobj_DYS437_foren(): uas=False, kit="forenseq", ) - assert marker.annotation == "[TCTA]9 [TCTG]3 [TCTA]4" + assert marker.convert == "[TCTA]9 [TCTG]3 [TCTA]4" assert str(marker.canonical) == "16" assert marker.designation == ("9", None, None) assert marker.flank_3p == ( @@ -533,7 +529,7 @@ def test_strobj_DYS392_foren(): uas=False, kit="forenseq", ) - assert marker.annotation == "[ATA]13" + assert marker.convert == "[ATA]13" assert str(marker.canonical) == "13" assert marker.designation == ("13", None, None) assert marker.flank_3p == ( @@ -550,7 +546,7 @@ def test_strobj_DYS391_foren(): uas=False, kit="forenseq", ) - assert marker.annotation == "[TCTG]3 [TCTA]13 TCTG" + assert marker.convert == "[TCTG]3 [TCTA]13 TCTG" assert str(marker.canonical) == "13" assert marker.designation == ("13", None, None) assert marker.flank_3p == "CCTA TCT [GCCT]2 ACCT ATCC CTCT AT" @@ -565,7 +561,7 @@ def test_strobj_DYS19_foren(): uas=False, kit="forenseq", ) - assert marker.annotation == "[TCTA]11 CCTA [TCTA]3" + assert marker.convert == "[TCTA]11 CCTA [TCTA]3" assert str(marker.canonical) == "14" assert marker.designation == ("11", None, None) assert marker.flank_3p == "" @@ -579,7 +575,7 @@ def test_strobj_HPRTB(): uas=False, kit="forenseq", ) - assert marker.annotation == "[ATCT]12" + assert marker.convert == "[ATCT]12" assert marker.canonical == 12 assert marker.designation == ("12", None, None) assert marker.flank_5p == ( @@ -597,7 +593,7 @@ def test_strobj_DXS8378(): uas=False, kit="forenseq", ) - assert marker.annotation == "[ATAG]11" + assert marker.convert == "[ATAG]11" assert marker.canonical == 11 assert marker.designation == ("11", None, None) assert marker.flank_5p == ( @@ -618,7 +614,7 @@ def test_strobj_DXS7132(): uas=False, kit="forenseq", ) - assert marker.annotation == "[TAGA]13" + assert marker.convert == "[TAGA]13" assert marker.canonical == 13 assert marker.designation == ("13", None, None) assert marker.flank_3p == ( @@ -654,7 +650,7 @@ def test_strobj_DXS7132(): ) def test_strobj_DXS10135(sequence, bracketed, conc, lus, sec, tert): marker = STRMarkerObject("DXS10135", sequence, uas=False, kit="forenseq") - assert marker.annotation == bracketed + assert marker.convert == bracketed assert str(marker.canonical) == conc assert marker.designation == (lus, sec, tert) @@ -692,7 +688,7 @@ def test_strobj_DXS10135(sequence, bracketed, conc, lus, sec, tert): ) def test_strobj_DXS10074(sequence, bracketed, conc, lus, sec, tert, flank_5p, flank_3p): marker = STRMarkerObject("DXS10074", sequence, uas=False, kit="forenseq") - assert marker.annotation == bracketed + assert marker.convert == bracketed assert str(marker.canonical) == conc assert marker.designation == (lus, sec, tert) assert marker.flank_5p == flank_5p @@ -707,7 +703,7 @@ def test_strobj_Y_GATA_H4(): uas=False, kit="forenseq", ) - assert marker.annotation == ( + assert marker.convert == ( "C [TATC]3 TATT [CATC]2 TAAT CTAT CCAT [TCTA]11 [CCTA]3 [TCTA]2 TAGA [TCTA]3 TCT" ) assert str(marker.canonical) == "11" @@ -745,7 +741,7 @@ def test_strobj_Y_GATA_H4(): ) def test_strobj_DYS390(sequence, bracketed, conc, lus, sec, tert, flank_5p, kit): marker = STRMarkerObject("DYS390", sequence, uas=False, kit=kit) - assert marker.annotation == bracketed + assert marker.convert == bracketed assert str(marker.canonical) == conc assert marker.designation == (lus, sec, tert) assert marker.flank_5p == flank_5p @@ -783,7 +779,7 @@ def test_strobj_DYS390(sequence, bracketed, conc, lus, sec, tert, flank_5p, kit) ) def test_strobj_DYS385(sequence, bracketed, conc, lus, sec, tert, kit): marker = STRMarkerObject("DYS385A-B", sequence, uas=False, kit=kit) - assert marker.annotation == bracketed + assert marker.convert == bracketed assert str(marker.canonical) == conc assert marker.designation == (lus, sec, tert) @@ -815,7 +811,7 @@ def test_strobj_DYS385(sequence, bracketed, conc, lus, sec, tert, kit): ) def test_strobj_DYS448(sequence, bracketed, conc, lus, sec, tert): marker = STRMarkerObject("DYS448", sequence, uas=False, kit="forenseq") - assert marker.annotation == bracketed + assert marker.convert == bracketed assert marker.canonical == conc assert marker.designation == (lus, sec, tert) @@ -845,7 +841,7 @@ def test_strobj_DYS448(sequence, bracketed, conc, lus, sec, tert): ) def test_strobj_DXS10103(sequence, bracketed, conc, lus, sec, tert): marker = STRMarkerObject("DXS10103", sequence, uas=False, kit="forenseq") - assert marker.annotation == bracketed + assert marker.convert == bracketed assert marker.canonical == conc assert marker.designation == (lus, sec, tert) @@ -868,7 +864,7 @@ def test_strobj_DXS10103(sequence, bracketed, conc, lus, sec, tert): ) def test_strobj_DYS389II(sequence, bracketed, conc, lus, sec, tert): marker = STRMarkerObject("DYS389II", sequence, uas=False, kit="forenseq") - assert marker.annotation == bracketed + assert marker.convert == bracketed assert marker.canonical == conc assert marker.designation == (lus, sec, tert) @@ -904,7 +900,7 @@ def test_strobj_DYS389II(sequence, bracketed, conc, lus, sec, tert): ) def test_strobj_D18S51(sequence, bracketed, lus, sec, tert, flank_5, flank_3, kit): marker = STRMarkerObject("D18S51", sequence, uas=False, kit=kit) - assert marker.annotation == bracketed + assert marker.convert == bracketed assert marker.designation == (lus, sec, tert) assert marker.flank_5p == flank_5 assert marker.flank_3p == flank_3 @@ -1441,7 +1437,7 @@ def test_strobj_D18S51(sequence, bracketed, lus, sec, tert, flank_5, flank_3, ki ) def test_new_power_config(locus, sequence, bracketed, conc, lus, sec, tert, flank_5, flank_3): marker = STRMarkerObject(locus, sequence, uas=False, kit="powerseq") - assert marker.annotation == bracketed + assert marker.convert == bracketed assert str(marker.canonical) == conc assert marker.designation == (lus, sec, tert) assert marker.flank_5p == flank_5 diff --git a/lusSTR/tests/test_repeat.py b/lusSTR/tests/test_repeat.py index c90077d9..385cb795 100644 --- a/lusSTR/tests/test_repeat.py +++ b/lusSTR/tests/test_repeat.py @@ -11,10 +11,15 @@ # ------------------------------------------------------------------------------------------------- import lusSTR -from lusSTR.repeat import collapse_tandem_repeat, collapse_all_repeats, repeat_copy_number -from lusSTR.repeat import split_by_n, get_blocks, reverse_complement, reverse_complement_bracketed -from lusSTR.repeat import collapse_repeats_by_length, collapse_repeats_by_length_flanks -from lusSTR.repeat import sequence_to_bracketed_form +from lusSTR.scripts.repeat import collapse_tandem_repeat, collapse_all_repeats, repeat_copy_number +from lusSTR.scripts.repeat import ( + split_by_n, + get_blocks, + reverse_complement, + reverse_complement_bracketed, +) +from lusSTR.scripts.repeat import collapse_repeats_by_length, collapse_repeats_by_length_flanks +from lusSTR.scripts.repeat import sequence_to_bracketed_form import pytest diff --git a/lusSTR/tests/test_snps.py b/lusSTR/tests/test_snps.py index 5fb9ef21..8346b526 100644 --- a/lusSTR/tests/test_snps.py +++ b/lusSTR/tests/test_snps.py @@ -20,6 +20,7 @@ from tempfile import NamedTemporaryFile +@pytest.mark.snps def test_uas_all(tmp_path): inputdb = data_file("snps") exp_out = data_file("snps_uas_all.txt") @@ -30,6 +31,7 @@ def test_uas_all(tmp_path): assert filecmp.cmp(exp_out, obs_out) is True +@pytest.mark.snps @pytest.mark.parametrize("type, lines", [("i", 189), ("p", 157)]) def test_uas_type(type, lines, tmp_path): inputdb = data_file("snps") @@ -41,6 +43,7 @@ def test_uas_type(type, lines, tmp_path): assert len(fh.readlines()) == lines +@pytest.mark.snps def test_sr_all(tmp_path): inputdb = data_file("snps") exp_out = data_file("snps_sr_all.txt") @@ -54,6 +57,7 @@ def test_sr_all(tmp_path): assert filecmp.cmp(exp_out_full, obs_out_full) is True +@pytest.mark.snps @pytest.mark.parametrize("type, lines, full_lines", [("i", 181, 2152), ("p", 158, 2982)]) def test_sr_type(type, lines, full_lines, tmp_path): inputdb = data_file("snps") diff --git a/lusSTR/tests/test_suite.py b/lusSTR/tests/test_suite.py index e1156582..e23bf485 100644 --- a/lusSTR/tests/test_suite.py +++ b/lusSTR/tests/test_suite.py @@ -15,101 +15,112 @@ import pandas as pd import pytest import lusSTR -from lusSTR.marker import STRMarkerObject -from lusSTR.repeat import reverse_complement +from lusSTR.scripts.marker import STRMarkerObject +from lusSTR.scripts.repeat import reverse_complement from lusSTR.tests import data_file +from pathlib import Path +from pkg_resources import resource_filename import re from tempfile import NamedTemporaryFile +import shutil +import yaml def test_split_sequence_into_two_strings(): sequence = "TAGATAGATAGATGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGGTGTGTGTGTGTG" reverse_comp_sequence = reverse_complement(sequence) repeat_for_split = "CACA" - seq1, seq2 = lusSTR.annot.split_sequence_into_two_strings( + seq1, seq2 = lusSTR.scripts.repeat.split_sequence_into_two_strings( reverse_comp_sequence, repeat_for_split ) assert seq1 == "CACACACACACA" assert seq2 == "CCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCATCTATCTATCTA" -def test_annotate_uas(): - with NamedTemporaryFile() as outfile: - os.unlink(outfile.name) - inputfile = data_file("2800M_formatted_uas.csv") - testanno = data_file("2800M_uas_anno.txt") - arglist = ["annotate", inputfile, "-o", outfile.name, "--kit", "forenseq", "--uas"] - args = lusSTR.cli.get_parser().parse_args(arglist) - lusSTR.annot.main(args) - assert filecmp.cmp(testanno, outfile.name) is True - - @pytest.mark.parametrize( - "infile, len_sum, len_uncom, xy_len_sum, xy_len_uncom, kit", + "infile, len_sum, len_uncom, xy_len_sum, xy_len_uncom, pwrseq", [ - ("testformat_sr.csv", 897, 913, 9701, 16108, "forenseq"), - ("powerseq.csv", 353, 441, 256, 303, "powerseq"), + ("testformat_sr", 897, 913, 9701, 16108, False), + ("powerseq", 353, 441, 256, 303, True), ], ) -def test_annotate_full_nocombine(infile, len_sum, len_uncom, xy_len_sum, xy_len_uncom, kit): - inputfile = data_file(infile) - with NamedTemporaryFile() as outfile: - arglist_nocomb = [ - "annotate", - inputfile, +def test_convert_full_nocombine( + infile, len_sum, len_uncom, xy_len_sum, xy_len_uncom, pwrseq, tmp_path +): + str_path = str(tmp_path / "WD") + inputfile = data_file(f"{infile}.csv") + inputfile_sex = data_file(f"{infile}_sexloci.csv") + obs_out = f"{infile}.csv" + obs_sex_out = f"{infile}_sexloci.csv" + if pwrseq is False: + arglist = [ + "config", + "-w", + str_path, + "--straitrazor", + "--nocombine", + "--sex", "-o", - outfile.name, - "--kit", - kit, + infile, + "--input", + "WD", + ] + else: + arglist = [ + "config", + "-w", + str_path, + "--straitrazor", "--nocombine", - "--include-sex", + "--sex", + "-o", + infile, + "--input", + "WD", + "--powerseq", ] - args_nocomb = lusSTR.cli.get_parser().parse_args(arglist_nocomb) - lusSTR.annot.main(args_nocomb) - outfile_name = os.path.splitext(outfile.name)[0] - with open(f"{outfile_name}_no_combined_reads.txt", "r") as fh: - assert len(fh.readlines()) == len_uncom - with open(f"{outfile_name}_sexloci_no_combined_reads.txt", "r") as fh: - assert len(fh.readlines()) == xy_len_uncom - arglist_comb = ["annotate", inputfile, "-o", outfile.name, "--kit", kit, "--include-sex"] - args_comb = lusSTR.cli.get_parser().parse_args(arglist_comb) - lusSTR.annot.main(args_comb) - with open(outfile.name, "r") as fh: - assert len(fh.readlines()) == len_sum - with open(f"{outfile_name}_sexloci.txt", "r") as fh: - assert len(fh.readlines()) == xy_len_sum - - -def test_flank_anno(): - with NamedTemporaryFile(suffix=".txt") as outfile: - inputfile = data_file("test_flank.csv") - testflanks = data_file("testflanks_flanks_anno.txt") - arglist = ["annotate", inputfile, "-o", outfile.name, "--kit", "forenseq"] - args = lusSTR.cli.get_parser().parse_args(arglist) - lusSTR.annot.main(args) - outfile_name = os.path.splitext(outfile.name)[0] - outfile_name_output = f"{outfile_name}_flanks_anno.txt" - assert filecmp.cmp(testflanks, outfile_name_output) is True - - -def test_annotate_combine(): - with NamedTemporaryFile() as outfile: - inputfile = data_file("Flanks_testing_file.csv") - arglist = ["annotate", inputfile, "-o", outfile.name, "--kit", "forenseq"] - args = lusSTR.cli.get_parser().parse_args(arglist) - lusSTR.annot.main(args) - with open(outfile.name, "r") as fh: - assert len(fh.readlines()) == 952 - - -def test_FGA_short_seq(): - with NamedTemporaryFile(suffix=".txt") as outfile: - input = data_file("test_FGA_short_seq.csv") - arglist = ["annotate", input, "-o", outfile.name, "--kit", "forenseq"] - args = lusSTR.cli.get_parser().parse_args(arglist) - lusSTR.annot.main(args) - with open(outfile.name, "r") as fh: - assert len(fh.readlines()) == 0 + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist)) + shutil.copyfile(inputfile_sex, os.path.join(str_path, obs_sex_out)) + shutil.copyfile(inputfile, os.path.join(str_path, obs_out)) + convert_arglist = ["strs", "convert", "-w", str_path] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(convert_arglist)) + with open(f"{str_path}/{infile}_no_combined_reads.txt", "r") as fh: + assert len(fh.readlines()) == len_uncom + with open(f"{str_path}/{infile}_sexloci_no_combined_reads.txt", "r") as fh: + assert len(fh.readlines()) == xy_len_uncom + with open(f"{str_path}/{infile}.txt", "r") as fh: + assert len(fh.readlines()) == len_sum + with open(f"{str_path}/{infile}_sexloci.txt", "r") as fh: + assert len(fh.readlines()) == xy_len_sum + + +def test_flanks(tmp_path): + inputfile = data_file("test_flank.csv") + exp_out = data_file("testflanks_flanks.txt") + str_path = str(tmp_path / "WD") + obs_out = str(tmp_path / "WD/testflanks_flanks.txt") + arglist = ["config", "-w", str_path, "-o", "testflanks", "--straitrazor", "--input", "WD"] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist)) + shutil.copyfile(inputfile, os.path.join(str_path, "testflanks.csv")) + convert_arglist = ["strs", "convert", "-w", str_path] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(convert_arglist)) + assert filecmp.cmp(exp_out, obs_out) is True + + +@pytest.mark.parametrize( + "input, exp_length", [("Flanks_testing_file.csv", 952), ("test_FGA_short_seq.csv", 2)] +) +def test_convert_combine(input, exp_length, tmp_path): + inputfile = data_file(input) + str_path = str(tmp_path / "WD") + obs_out = str(tmp_path / "WD/testflanks.txt") + arglist = ["config", "-w", str_path, "-o", "testflanks", "--straitrazor", "--input", "WD"] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist)) + shutil.copyfile(inputfile, os.path.join(str_path, "testflanks.csv")) + convert_arglist = ["strs", "convert", "-w", str_path] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(convert_arglist)) + with open(obs_out, "r") as fh: + assert len(fh.readlines()) == exp_length @pytest.mark.parametrize( @@ -136,83 +147,143 @@ def test_indel_flag(locus, sequence, uas, kit, output): assert marker.indel_flag == output -def test_powerseq_flanking_anno(): - with NamedTemporaryFile(suffix=".txt") as outfile: - input = data_file("powerseq_flanking_anno_test.csv") - test_powerseq = data_file("powerseq_flanking_anno_test_flanks_anno.txt") - arglist = ["annotate", input, "-o", outfile.name, "--kit", "powerseq"] - args = lusSTR.cli.get_parser().parse_args(arglist) - lusSTR.annot.main(args) - outfile_name = os.path.splitext(outfile.name)[0] - outfile_name_output = f"{outfile_name}_flanks_anno.txt" - assert filecmp.cmp(test_powerseq, outfile_name_output) is True +def test_powerseq_flanks(tmp_path): + inputfile = data_file("powerseq_flanking_convert_test.csv") + exp_out = data_file("powerseq_flanking_convert_test_flanks.txt") + str_path = str(tmp_path / "WD") + obs_out = str(tmp_path / "WD/powerseq_flanks.txt") + arglist = [ + "config", + "-w", + str_path, + "-o", + "powerseq", + "--straitrazor", + "--input", + "WD", + "--powerseq", + ] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist)) + shutil.copyfile(inputfile, os.path.join(str_path, "powerseq.csv")) + convert_arglist = ["strs", "convert", "-w", str_path] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(convert_arglist)) + assert filecmp.cmp(exp_out, obs_out) is True -def test_annotate_uas_sexloci(): - with NamedTemporaryFile() as outfile: - os.unlink(outfile.name) - inputfile = data_file("testformat_uas.csv") - testanno = data_file("testformat_uas_sexloci.txt") - arglist = [ - "annotate", - inputfile, - "-o", - outfile.name, - "--kit", - "forenseq", - "--uas", - "--include-sex", - ] - args = lusSTR.cli.get_parser().parse_args(arglist) - lusSTR.annot.main(args) - outfile_name = os.path.splitext(outfile.name)[0] - outfile_name_output = f"{outfile_name}_sexloci.txt" - assert filecmp.cmp(testanno, outfile_name_output) is True +def test_convert_uas_sexloci(tmp_path): + str_path = str(tmp_path / "WD") + inputfile = data_file("testformat_uas.csv") + inputfile_sex = data_file("testformat_uas_sexloci.csv") + exp_sex_out = data_file("testformat_uas_sexloci.txt") + obs_sex_out = str(tmp_path / "WD/testformatuas_sexloci.txt") + arglist = ["config", "-w", str_path, "-o", "testformatuas", "--sex", "--input", "WD"] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist)) + shutil.copyfile(inputfile_sex, os.path.join(str_path, "testformatuas_sexloci.csv")) + shutil.copyfile(inputfile, os.path.join(str_path, "testformatuas.csv")) + convert_arglist = ["strs", "convert", "-w", str_path] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(convert_arglist)) + assert filecmp.cmp(exp_sex_out, obs_sex_out) is True @pytest.mark.parametrize( - "inputfile, testoutput, flank_output, kit", + "input, testoutput, flank_output, kit", [ ( - "testformat_sr.csv", + "testformat_sr", "testformat_sr_sexloci.txt", - "testformat_sr_sexloci_flanks_anno.txt", + "testformat_sr_sexloci_flanks.txt", "forenseq", ), ( - "powerseq_flanking_anno_test.csv", - "powerseq_flanking_anno_test_sexloci.txt", - "powerseq_flanking_anno_test_sexloci_flanks_anno.txt", + "powerseq_flanking_convert_test", + "powerseq_flanking_convert_test_sexloci.txt", + "powerseq_flanking_convert_test_sexloci_flanks.txt", "powerseq", ), ], ) -def test_annotate_sr_sexloci(inputfile, testoutput, flank_output, kit): - with NamedTemporaryFile() as outfile: - os.unlink(outfile.name) - inputfile = data_file(inputfile) - testanno = data_file(testoutput) - flankanno = data_file(flank_output) - arglist = ["annotate", inputfile, "-o", outfile.name, "--kit", kit, "--include-sex"] - args = lusSTR.cli.get_parser().parse_args(arglist) - lusSTR.annot.main(args) - outfile_name = os.path.splitext(outfile.name)[0] - outfile_name_output = f"{outfile_name}_sexloci.txt" - assert filecmp.cmp(testanno, outfile_name_output) is True - flank_outfile = f"{outfile_name}_sexloci_flanks_anno.txt" - assert filecmp.cmp(flankanno, flank_outfile) is True - - -@pytest.mark.parametrize("flag", "sex", [("", ""), ("--include-sex", "_sexloci")]) -def separate_output(tmp_path, flag, sex): - inputfile = data_file("UAS_bulk_test.csv") - outputfile = str(tmp_path / "UAS_bulk_test.txt") - arglist = ["annotate", inputfile, "-o", outputfile, "--separate", flag] - args = lusSTR.cli.get_parser().parse_args(arglist) - lusSTR.annot.main(args) - assert os.file.exists( - f"{tmp_path}/Separated_lusstr_Files/UAS_bulk_test/Positive_Control{sex}.txt" - ) - assert os.file.exists( - f"{tmp_path}/Separated_lusstr_Files/UAS_bulk_test/Positive_Control2{sex}.txt" - ) +def test_convert_sr_sexloci(input, testoutput, flank_output, kit, tmp_path): + str_path = str(tmp_path / "WD") + inputfile = data_file(f"{input}.csv") + inputfile_sex = data_file(f"{input}_sexloci.csv") + exp_sex_out = data_file(testoutput) + exp_sex_flank_out = data_file(flank_output) + obs_sex_out = str(tmp_path / "WD/testformatsr_sexloci.txt") + obs_sex_flank_out = str(tmp_path / "WD/testformatsr_sexloci_flanks.txt") + if kit == "forenseq": + arglist = [ + "config", + "-w", + str_path, + "-o", + "testformatsr", + "--sex", + "--input", + "WD", + "--straitrazor", + ] + else: + arglist = [ + "config", + "-w", + str_path, + "-o", + "testformatsr", + "--sex", + "--input", + "WD", + "--straitrazor", + "--powerseq", + ] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist)) + shutil.copyfile(inputfile_sex, os.path.join(str_path, "testformatsr_sexloci.csv")) + shutil.copyfile(inputfile, os.path.join(str_path, "testformatsr.csv")) + convert_arglist = ["strs", "convert", "-w", str_path] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(convert_arglist)) + assert filecmp.cmp(exp_sex_out, obs_sex_out) is True + assert filecmp.cmp(exp_sex_flank_out, obs_sex_flank_out) is True + + +def test_config(tmp_path): + obs_config = str(tmp_path / "config.yaml") + exp_config = resource_filename("lusSTR", "data/config.yaml") + arglist = ["config", "-w", str(tmp_path)] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist)) + assert os.path.exists(obs_config) + + +def test_config_settings(tmp_path): + obs_config = str(tmp_path / "config.yaml") + arglist = ["config", "-w", str(tmp_path), "--straitrazor", "--reference", "--ce"] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist)) + with open(obs_config, "r") as file: + data = yaml.safe_load(file) + assert data["uas"] is False + assert data["data_type"] == "ce" + assert data["profile_type"] == "reference" + + +@pytest.mark.parametrize( + "command, output, format_out, convert_out, all_out", + [ + ("format", "lusstr_output.csv", True, False, False), + ("convert", "lusstr_output.txt", True, True, False), + ("all", "lusstr_output/Positive_Control_evidence_ngs.csv", True, True, True), + ], +) +def test_snakemake(command, output, format_out, convert_out, all_out, tmp_path): + config = str(tmp_path / "config.yaml") + inputfile = data_file("UAS_bulk_input/Positive Control Sample Details Report 2315.xlsx") + exp_output = data_file(output) + obs_output = str(tmp_path / output) + obs_format_output = str(tmp_path / "lusstr_output.csv") + obs_convert_output = str(tmp_path / "lusstr_output.txt") + obs_all_output = str(tmp_path / "lusstr_output/") + arglist = ["config", "-w", str(tmp_path), "--input", inputfile] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist)) + snakemake_arglist = ["strs", command, "-w", str(tmp_path)] + lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(snakemake_arglist)) + assert os.path.exists(obs_format_output) is format_out + assert os.path.exists(obs_convert_output) is convert_out + assert os.path.exists(obs_all_output) is all_out + assert filecmp.cmp(exp_output, obs_output) is True diff --git a/lusSTR/workflows/snps.smk b/lusSTR/workflows/snps.smk new file mode 100644 index 00000000..e80020a7 --- /dev/null +++ b/lusSTR/workflows/snps.smk @@ -0,0 +1,120 @@ +import glob +import lusSTR +import openpyxl +import os +import pandas as pd +from pathlib import Path +import re + + +## placeholder until I update for snps + +configfile: "config.yaml" +output_name = config["output"] +input_name = config["samp_input"] +software = config["output_type"] +prof = config["profile_type"] +data = config["data_type"] +filter_sep = config["filter_sep"] + + +def get_sample_IDs(input, uas, output, software, separate): + file_ext = ".xlsx" if uas is True else ".txt" + if software == "efm" and separate is False: + return os.path.basename(output) + else: + if uas is True: + if os.path.isdir(input): + files = glob.glob(os.path.join(input, f"[!~]*{file_ext}")) + else: + files = input + ID_list = get_uas_ids(files) + else: + if os.path.isdir(input): + files = glob.glob(os.path.join(input, f"[!~]*{file_ext}")) + else: + files = input + files = [sub.replace(dir, "") for sub in files] + ID_list = [sub.replace(file_ext, "") for sub in files] + return ID_list + + +def get_uas_ids(files): + samplelist = [] + if isinstance(files, list): + for filename in sorted(files): + if "Sample Details" not in filename: + continue + sampleID = parse_sample_details(filename) + samplelist.append(sampleID) + else: + samplelist = parse_sample_details(files) + return samplelist + + +def parse_sample_details(filename): + file = openpyxl.load_workbook(filename) + file_sheet = file["Autosomal STRs"] + table = pd.DataFrame(file_sheet.values) + sampleID = re.sub(" ", "_", table.iloc[2, 1]) + return sampleID + + +rule all: + input: + expand("{name}.csv", name=output_name), + expand("{name}.txt", name=output_name), + expand( + "{outdir}/{samplename}_{prof_t}_{data_t}.csv", outdir=output_name, + samplename=get_sample_IDs(input_name, config["uas"], output_name, software, + filter_sep), prof_t=prof, data_t=data + ) + + +rule format: + input: + expand("{samp_input}", samp_input=input_name) + output: + expand("{name}.csv", name=output_name) + params: + uas=config["uas"], + sex=config["sex"] + script: + lusSTR.wrapper("format") + + +rule annotate: + input: + rules.format.output + output: + expand("{name}.txt", name=output_name) + params: + uas=config["uas"], + sex=config["sex"], + combine=config["nocombine"], + separate=config["separate"], + kit=config["kit"] + script: + lusSTR.wrapper("annot") + + +rule filter: + input: + rules.annotate.output + output: + expand( + "{outdir}/{samplename}_{prof_t}_{data_t}.csv", outdir=output_name, + samplename=get_sample_IDs(input_name, config["uas"], output_name, software, + filter_sep), prof_t=prof, data_t=data + ) + params: + output_type=config["output_type"], + profile_type=config["profile_type"], + data_type=config["data_type"], + output_dir=config["output"], + info=config["info"], + filter_sep=config["filter_sep"], + filters=config["nofilters"], + script: + lusSTR.wrapper("filter") + \ No newline at end of file diff --git a/lusSTR/workflows/strs.smk b/lusSTR/workflows/strs.smk new file mode 100644 index 00000000..fd14c43e --- /dev/null +++ b/lusSTR/workflows/strs.smk @@ -0,0 +1,130 @@ +import glob +import lusSTR +import openpyxl +import os +import pandas as pd +from pathlib import Path +import re + + +configfile: "config.yaml" +output_name = config["output"] +input_name = config["samp_input"] +software = config["output_type"] +prof = config["profile_type"] +data = config["data_type"] +separate = config["separate"] + + +def get_sample_IDs(input, uas, output, software, separate): + convert_out = f"{output}.txt" + format_out = f"{output}.csv" + if software == "efm" and separate is False: + ID_list = os.path.basename(output) + elif os.path.exists(convert_out): + ID_list = get_existing_IDs(convert_out, "\t") + elif os.path.exists(format_out): + ID_list = get_existing_IDs(format_out, ",") + else: + file_ext = ".xlsx" if uas is True else ".txt" + if uas is True: + if os.path.isdir(input): + files = glob.glob(os.path.join(input, f"[!~]*{file_ext}")) + else: + files = input + ID_list = get_uas_ids(files) + else: + if os.path.isdir(input): + files = glob.glob(os.path.join(input, f"[!~]*{file_ext}")) + files = [sub.replace(input, "") for sub in files] + ID_list = [sub.replace(file_ext, "") for sub in files] + else: + files = os.path.basename(input) + ID_list = files.replace(file_ext, "") + return ID_list + + +def get_existing_IDs(infile, separator): + data = pd.read_csv(infile, sep=separator) + IDs = data["SampleID"].unique() + return IDs + + +def get_uas_ids(files): + samplelist = [] + if isinstance(files, list): + for filename in sorted(files): + if "Sample Details" not in filename: + continue + sampleID = parse_sample_details(filename) + samplelist.append(sampleID) + else: + samplelist = parse_sample_details(files) + return samplelist + + +def parse_sample_details(filename): + file = openpyxl.load_workbook(filename) + file_sheet = file["Autosomal STRs"] + table = pd.DataFrame(file_sheet.values) + sampleID = re.sub(" ", "_", table.iloc[2, 1]) + return sampleID + + +rule all: + input: + expand("{name}.csv", name=output_name), + expand("{name}.txt", name=output_name), + expand( + "{outdir}/{samplename}_{prof_t}_{data_t}.csv", outdir=output_name, + samplename=get_sample_IDs(input_name, config["uas"], output_name, software, + separate), prof_t=prof, data_t=data + ) + + +rule format: + input: + expand("{samp_input}", samp_input=input_name) + output: + expand("{name}.csv", name=output_name) + params: + uas=config["uas"], + sex=config["sex"] + script: + lusSTR.wrapper("format") + + +rule convert: + input: + rules.format.output + output: + expand("{name}.txt", name=output_name) + params: + uas=config["uas"], + sex=config["sex"], + nocombine=config["nocombine"], + kit=config["kit"] + script: + lusSTR.wrapper("convert") + + +rule filter: + input: + rules.convert.output + output: + expand( + "{outdir}/{samplename}_{prof_t}_{data_t}.csv", outdir=output_name, + samplename=get_sample_IDs(input_name, config["uas"], output_name, software, + separate), prof_t=prof, data_t=data + ) + params: + output_type=config["output_type"], + profile_type=config["profile_type"], + data_type=config["data_type"], + output_dir=config["output"], + info=config["info"], + separate=config["separate"], + filters=config["nofilters"], + script: + lusSTR.wrapper("filter") + \ No newline at end of file diff --git a/lusSTR/annot.py b/lusSTR/wrappers/convert.py similarity index 61% rename from lusSTR/annot.py rename to lusSTR/wrappers/convert.py index 6356b2c0..e804fd17 100644 --- a/lusSTR/annot.py +++ b/lusSTR/wrappers/convert.py @@ -15,38 +15,17 @@ import os import pandas as pd import re - import lusSTR -from lusSTR.repeat import collapse_all_repeats, collapse_repeats_by_length -from lusSTR.repeat import sequence_to_bracketed_form, split_by_n -from lusSTR.repeat import reverse_complement, reverse_complement_bracketed -from pkg_resources import resource_filename - - -def get_str_metadata_file(): - return resource_filename("lusSTR", "str_markers.json") +from lusSTR.scripts.marker import get_str_metadata_file, STRMarkerObject +from lusSTR.scripts.repeat import collapse_all_repeats, collapse_repeats_by_length +from lusSTR.scripts.repeat import sequence_to_bracketed_form, split_by_n +from lusSTR.scripts.repeat import reverse_complement, reverse_complement_bracketed with open(get_str_metadata_file(), "r") as fh: str_marker_data = json.load(fh) -def split_sequence_into_two_strings(sequence, repeat_for_split): - """ - Function to split a sequence into two separate strings at a specified repeat unit. - """ - last = 0 - prev = 0 - for m in re.finditer(repeat_for_split, sequence): - if m.start() == prev or m.start() == last or prev == 0: - prev = m.end() - else: - last = m.end() - first_string = sequence[:prev] - second_string = sequence[prev:] - return first_string, second_string - - def format_table(input, uas=False, kit="forenseq"): """ Function to format final output table and the flanking report (if necessary). @@ -117,7 +96,7 @@ def format_table(input, uas=False, kit="forenseq"): flanks_list.append(flank_summary) continue - marker = lusSTR.marker.STRMarkerObject(locus, sequence, uas=uas, kit=kit) + marker = STRMarkerObject(locus, sequence, uas=uas, kit=kit) summary = [sampleid, project, analysis, locus] + marker.summary + [reads] list_of_lists.append(summary) @@ -131,7 +110,7 @@ def format_table(input, uas=False, kit="forenseq"): marker.canonical, marker.sequence, marker.flank_5p, - marker.annotation, + marker.convert, marker.flank_3p, marker.indel_flag, ] @@ -191,62 +170,46 @@ def sort_table(table): return sorted_table -def indiv_files(table, input_dir, ext): - output_dir = f"Separated_lusstr_Files/{input_dir}" - try: - os.mkdir(output_dir) - except FileExistsError: - pass - for samp in table["SampleID"].unique(): - new_df = table[table["SampleID"] == samp] - new_df.to_csv(f"{output_dir}/{samp}{ext}", sep="\t", index=False) - - -def main(args): - if args.separate and os.path.exists("Separated_lusstr_Files") is False: - os.mkdir("Separated_lusstr_Files") - output_name = os.path.splitext(args.out)[0] - input_name = os.path.splitext(args.input)[0] - autosomal_final_table, autosomal_flank_table, columns = format_table( - args.input, args.uas, args.kit - ) - if args.sex: +def main(input, out, kit, uas, sex, nocombine): + input = str(input) + out = str(out) + output_name = os.path.splitext(out)[0] + input_name = os.path.splitext(input)[0] + autosomal_final_table, autosomal_flank_table, columns = format_table(input, uas, kit) + if sex: sex_final_table, sex_flank_table, columns = format_table( - f"{input_name}_sexloci.csv", args.uas, args.kit + f"{input_name}_sexloci.csv", uas, kit ) - if not args.uas: - sex_flank_table.to_csv(f"{output_name}_sexloci_flanks_anno.txt", sep="\t", index=False) - if args.combine: - if not sex_final_table.empty: - sex_final_table = combine_reads(sex_final_table, columns) - if args.separate: - indiv_files(sex_final_table, input_name, "_sexloci.txt") - else: - sex_final_table.to_csv(f"{output_name}_sexloci.txt", sep="\t", index=False) - else: - if args.separate: - indiv_files(sex_final_table, input_name, "_sexloci_no_combined_reads.txt") - sex_final_table.to_csv(f"{output_name}_sexloci_no_combined_reads.txt", index=False) - else: - if args.separate: - indiv_files(sex_final_table, input_name, "_sexloci.txt") - else: + if not uas: + if not sex_final_table.empty: + sex_flank_table.to_csv(f"{output_name}_sexloci_flanks.txt", sep="\t", index=False) + if nocombine: + sex_final_table.to_csv( + f"{output_name}_sexloci_no_combined_reads.txt", index=False + ) + sex_final_table = combine_reads(sex_final_table, columns) sex_final_table.to_csv(f"{output_name}_sexloci.txt", sep="\t", index=False) - if not args.uas: - autosomal_flank_table.to_csv(f"{output_name}_flanks_anno.txt", sep="\t", index=False) - if args.combine: - if not autosomal_final_table.empty: - autosomal_final_table = combine_reads(autosomal_final_table, columns) - if args.separate: - indiv_files(autosomal_final_table, input_name, ".txt") - else: - autosomal_final_table.to_csv(args.out, sep="\t", index=False) else: - autosomal_final_table.to_csv( - f"{output_name}_no_combined_reads.txt", sep="\t", index=False - ) + sex_final_table.to_csv(f"{output_name}_sexloci.txt", sep="\t", index=False) + if not uas: + if not autosomal_final_table.empty: + autosomal_flank_table.to_csv(f"{output_name}_flanks.txt", sep="\t", index=False) + if nocombine: + autosomal_final_table.to_csv( + f"{output_name}_no_combined_reads.txt", sep="\t", index=False + ) + autosomal_final_table = combine_reads(autosomal_final_table, columns) + autosomal_final_table.to_csv(out, sep="\t", index=False) else: - if args.separate: - indiv_files(autosomal_final_table, input_name, ".txt") - else: - autosomal_final_table.to_csv(args.out, sep="\t", index=False) + autosomal_final_table.to_csv(out, sep="\t", index=False) + + +if __name__ == "__main__": + main( + snakemake.input, + snakemake.output, + kit=snakemake.params.kit, + uas=snakemake.params.uas, + sex=snakemake.params.sex, + nocombine=snakemake.params.nocombine, + ) diff --git a/lusSTR/filter.py b/lusSTR/wrappers/filter.py similarity index 85% rename from lusSTR/filter.py rename to lusSTR/wrappers/filter.py index 8e059d78..88c676f3 100644 --- a/lusSTR/filter.py +++ b/lusSTR/wrappers/filter.py @@ -14,7 +14,7 @@ from collections import defaultdict import json import lusSTR -from lusSTR.filter_settings import filters, flags +from lusSTR.scripts.filter_settings import filters, flags import numpy as np import os import pandas as pd @@ -56,7 +56,7 @@ def get_filter_metadata_file(): - return resource_filename("lusSTR", "filters.json") + return resource_filename("lusSTR", "data/filters.json") with open(get_filter_metadata_file(), "r") as fh: @@ -118,7 +118,7 @@ def EFM_output(profile, outfile, profile_type, separate=False): profile = profile[profile.allele_type != "BelowAT"] efm_profile = populate_efm_profile(profile) if separate: - write_sample_specific_efm_profiles(efm_profile, profile_type) + write_sample_specific_efm_profiles(efm_profile, profile_type, outfile) else: write_aggregate_efm_profile(efm_profile, profile_type, outfile) @@ -156,13 +156,13 @@ def populate_efm_profile(profile): return efm_profile -def write_sample_specific_efm_profiles(efm_profile, profile_type, outdir="Separated_EFM_Files"): - Path(outdir).mkdir(exist_ok=True) +def write_sample_specific_efm_profiles(efm_profile, profile_type, outdir): + Path(outdir).mkdir(parents=True, exist_ok=True) for sample in efm_profile.SampleName: - sample_profile = efm_profile[efm_profile.SampleName == sample] + sample_profile = efm_profile[efm_profile.SampleName == sample].reset_index(drop=True) sample_profile.dropna(axis=1, how="all", inplace=True) if profile_type == "evidence": - sample_profile.to_csv(f"Separated_EFM_Files/{sample}.csv", index=False) + sample_profile.to_csv(f"{outdir}/{sample}_evidence_ce.csv", index=False) else: num_alleles = (len(sample_profile.columns) - 2) / 2 if num_alleles > 2: @@ -175,18 +175,19 @@ def write_sample_specific_efm_profiles(efm_profile, profile_type, outdir="Separa for i in range(len(sample_profile)): if pd.isna(sample_profile.loc[i, "Allele2"]): sample_profile.loc[i, "Allele2"] = sample_profile.loc[i, "Allele1"] - sample_profile.iloc[:, :4].to_csv(f"Separated_EFM_Files/{id}.csv", index=False) + sample_profile.iloc[:, :4].to_csv(f"{outdir}/{sample}_reference_ce.csv", index=False) def write_aggregate_efm_profile(efm_profile, profile_type, outfile): + Path(outfile).mkdir(parents=True, exist_ok=True) + name = os.path.basename(outfile) if profile_type == "evidence": - efm_profile.to_csv(outfile, index=False) + efm_profile.to_csv(f"{outfile}/{name}_evidence_ce.csv", index=False) else: for i in range(len(efm_profile)): if pd.isna(efm_profile.loc[i, "Allele2"]): efm_profile.loc[i, "Allele2"] = efm_profile.loc[i, "Allele1"] - prefix = outfile.replace(".csv", "") - efm_profile.iloc[:, :4].to_csv(f"{prefix}_reference.csv", index=False) + efm_profile.iloc[:, :4].to_csv(f"{outfile}/{name}_reference_ce.csv", index=False) def determine_max_num_alleles(allele_heights): @@ -199,6 +200,7 @@ def determine_max_num_alleles(allele_heights): def STRmix_output(profile, outdir, profile_type, data_type): + Path(outdir).mkdir(parents=True, exist_ok=True) if profile_type == "reference": filtered_df = profile[profile.allele_type == "real_allele"] else: @@ -221,7 +223,7 @@ def STRmix_output(profile, outdir, profile_type, data_type): for id in id_list: sample_df = strmix_profile[strmix_profile["SampleID"] == id].reset_index(drop=True) if profile_type == "evidence": - sample_df.iloc[:, 1:].to_csv(f"{outdir}/{id}_{data_type}.csv", index=False) + sample_df.iloc[:, 1:].to_csv(f"{outdir}/{id}_evidence_{data_type}.csv", index=False) else: reference_df = reference_table(sample_df, data_type) reference_df.to_csv(f"{outdir}/{id}_reference_{data_type}.csv", index=False) @@ -291,42 +293,47 @@ def format_ref_table(new_rows, sample_data, datatype): return sort_df -def main(args): - profile_type = args.profile +def main(input, output_type, profile_type, data_type, output_dir, info, separate, nofilters): + input = str(input) if profile_type not in ("evidence", "reference"): raise ValueError(f"unknown profile type '{profile_type}'") - data_type = args.data if data_type not in ("ce", "ngs"): raise ValueError(f"unknown data type '{data_type}'") - output_type = args.output if output_type not in ("efm", "strmix"): raise ValueError(f"unknown output type '{output_type}'") - full_df = pd.read_csv(args.input, sep="\t") - if args.out is None: - outpath = sys.stdout + full_df = pd.read_csv(input, sep="\t") + if output_dir is None: + raise ValueError("No output specified using --out.") else: - outpath = args.out - if args.nofilters: + outpath = output_dir + if nofilters: full_df["allele_type"] = "real_allele" - if args.output == "efm": - EFM_output(full_df, outpath, profile_type, args.separate) + if output_type == "efm": + EFM_output(full_df, outpath, profile_type, separate) else: STRmix_output(full_df, outpath, profile_type, data_type) else: dict_loc = {k: v for k, v in full_df.groupby(["SampleID", "Locus"])} final_df, flags_df = process_strs(dict_loc, data_type) if output_type == "efm": - EFM_output(final_df, outpath, profile_type, args.separate) + EFM_output(final_df, outpath, profile_type, separate) else: STRmix_output(final_df, outpath, profile_type, data_type) - if args.info: - if outpath != sys.stdout: - if output_type == "efm": - outputname = outpath.replace(".csv", "_") - else: - outputname = f"{outpath}/" - final_df.to_csv(f"{outputname}sequence_info.csv", index=False) - if not flags_df.empty: - flags_df.to_csv(f"{outputname}Flagged_Loci.csv", index=False) - else: - raise ValueError("No outfile provided. Please specify --out to create info file.") + if info: + name = os.path.basename(outpath) + final_df.to_csv(f"{outpath}/{name}_sequence_info.csv", index=False) + if not flags_df.empty: + flags_df.to_csv(f"{outpath}/{name}_Flagged_Loci.csv", index=False) + + +if __name__ == "__main__": + main( + snakemake.input, + output_type=snakemake.params.output_type, + profile_type=snakemake.params.profile_type, + data_type=snakemake.params.data_type, + output_dir=snakemake.params.output_dir, + info=snakemake.params.info, + separate=snakemake.params.separate, + nofilters=snakemake.params.filters, + ) diff --git a/lusSTR/format.py b/lusSTR/wrappers/format.py similarity index 91% rename from lusSTR/format.py rename to lusSTR/wrappers/format.py index 007d4200..41c209ef 100644 --- a/lusSTR/format.py +++ b/lusSTR/wrappers/format.py @@ -20,7 +20,7 @@ def uas_load(inpath, sexloci=False): - """Format a UAS Sample Details Report (.xlsx) for use with `lusSTR annotate`. + """Format a UAS Sample Details Report (.xlsx). The `inpath` argument can refer to a report file or a directory of report files. Any files without the `.xlsx` file extension are ignored. The `sexloci` argument determines whether X and @@ -69,7 +69,7 @@ def uas_format(infile, sexloci=False): def strait_razor_concat(inpath, sexloci=False): - """Format a directory of STRait Razor output files for use with `lusSTR annotate`.""" + """Format a directory of STRait Razor output files.""" locus_list = [ "CSF1PO", "D10S1248", @@ -186,14 +186,18 @@ def strait_razor_table(filename, analysisID, sexloci=False): return table -def main(args): - if args.uas: - results, sex_results = uas_load(args.input, args.sex) +def main(input, outfile, uas=True, sex=False): + if uas: + results, sex_results = uas_load(str(input), sex) else: - results, sex_results = strait_razor_concat(args.input, args.sex) - if args.out is None: - args.out = sys.stdout - results.to_csv(args.out, index=False) - if args.sex: - name = os.path.splitext(args.out)[0] + results, sex_results = strait_razor_concat(str(input), sex) + if outfile is None: + outfile = sys.stdout + results.to_csv(str(outfile), index=False) + if sex: + name = os.path.splitext(str(outfile))[0] sex_results.to_csv(f"{name}_sexloci.csv", index=False) + + +if __name__ == "__main__": + main(snakemake.input, snakemake.output, uas=snakemake.params.uas, sex=snakemake.params.sex) diff --git a/setup.py b/setup.py index 64ddca4b..1fde6f29 100755 --- a/setup.py +++ b/setup.py @@ -10,32 +10,34 @@ # Development Center. # ------------------------------------------------------------------------------------------------- +import glob from setuptools import setup import versioneer -desc = "Tool for converting NGS sequence data of forensic STR loci to various annotation styles" +desc = "Tool for converting NGS sequence data of forensic STR loci to various sequence representations and allele designations" setup( name="lusSTR", version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), description=desc, - packages=["lusSTR", "lusSTR.tests"], + packages=["lusSTR", "lusSTR.cli", "lusSTR.tests"], package_data={ "lusSTR": [ - "lusSTR/str_markers.json", - "lusSTR/snp_data.json", + "lusSTR/data/*", "lusSTR/tests/data/*", "lusSTR/tests/data/STRait_Razor_test_output/*", "lusSTR/tests/data/UAS_bulk_input/*", "lusSTR/tests/data/snps/*", "lusSTR/tests/data/RU_stutter_test/*", - "lusSTR/filters.json", "lusSTR/tests/data/NGS_stutter_test/*", + "lusSTR/workflows/*", + "lusSTR/wrappers/*", ] }, include_package_data=True, - install_requires=["pandas>=1.0", "openpyxl>=3.0.6"], - entry_points={"console_scripts": ["lusstr = lusSTR.__main__:main"]}, + install_requires=["pandas>=1.0,<2.0", "openpyxl>=3.0.6", "snakemake>=7.22.0", "pyyaml>=6.0"], + entry_points={"console_scripts": ["lusstr = lusSTR.cli:main"]}, + scripts=glob.glob("lusSTR/scripts/*"), classifiers=[ "Environment :: Console", "Framework :: IPython",