From 60ef14748d56666e6f087995102b29429c1558af Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Thu, 10 Oct 2024 16:31:47 -0400 Subject: [PATCH 1/3] Begin refactor --- Makefile | 4 +- lusSTR/__init__.py | 3 +- lusSTR/cli/__init__.py | 29 +- lusSTR/cli/config.py | 2 - lusSTR/cli/gui.py | 822 +++++++++++--------------------- lusSTR/cli/snps.py | 1 - lusSTR/cli/strs.py | 1 - lusSTR/gui/__init__.py | 11 + lusSTR/gui/app.py | 11 + lusSTR/gui/docs.py | 94 ++++ lusSTR/gui/select.py | 44 ++ lusSTR/wrappers/snps_convert.py | 8 +- 12 files changed, 460 insertions(+), 570 deletions(-) create mode 100644 lusSTR/gui/__init__.py create mode 100644 lusSTR/gui/app.py create mode 100644 lusSTR/gui/docs.py create mode 100644 lusSTR/gui/select.py diff --git a/Makefile b/Makefile index 3a97755..f125014 100755 --- a/Makefile +++ b/Makefile @@ -10,11 +10,11 @@ test: ## style: check code style style: - black --line-length=99 --check *.py lusSTR/scripts/*.py lusSTR/wrappers/*.py lusSTR/tests/test_*.py + black --line-length=99 --check *.py lusSTR/cli/*.py lusSTR/scripts/*.py lusSTR/wrappers/*.py lusSTR/tests/test_*.py ## format: auto-reformat code with Black format: - black --line-length=99 *.py lusSTR/cli/gui.py lusSTR/scripts/*.py lusSTR/wrappers/*.py lusSTR/tests/test_*.py + black --line-length=99 *.py lusSTR/cli/*.py lusSTR/scripts/*.py lusSTR/wrappers/*.py lusSTR/tests/test_*.py ## devenv: configure a development environment devenv: diff --git a/lusSTR/__init__.py b/lusSTR/__init__.py index ba56e6d..a811037 100644 --- a/lusSTR/__init__.py +++ b/lusSTR/__init__.py @@ -11,7 +11,8 @@ # ------------------------------------------------------------------------------------------------- import importlib.resources -from lusSTR import cli +from . import cli +from . import gui from lusSTR._version import get_versions __version__ = get_versions()["version"] diff --git a/lusSTR/cli/__init__.py b/lusSTR/cli/__init__.py index c2658b0..77d62f0 100644 --- a/lusSTR/cli/__init__.py +++ b/lusSTR/cli/__init__.py @@ -1,14 +1,23 @@ -import argparse -import importlib.resources +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2024, DHS. +# +# This file is part of lusSTR (http://github.com/bioforensics/lusSTR) and is licensed under +# the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- + +from argparse import ArgumentParser +from importlib.resources import files +import lusSTR +from lusSTR.cli import config, gui, strs, snps import streamlit.web.cli as stcli import sys -import lusSTR -from lusSTR.cli import config -from lusSTR.cli import strs -from lusSTR.cli import snps -from lusSTR.cli import gui -mains = {"config": config.main, "strs": strs.main, "snps": snps.main, "gui": gui.main} +mains = {"config": config.main, "strs": strs.main, "snps": snps.main} subparser_funcs = { "config": config.subparser, @@ -24,7 +33,7 @@ def main(args=None): if args.subcmd is None: get_parser().parse_args(["-h"]) elif args.subcmd == "gui": - gui_path = importlib.resources.files("lusSTR") / "cli" / "gui.py" + gui_path = files("lusSTR") / "cli" / "gui.py" sys.argv = ["streamlit", "run", str(gui_path)] sys.exit(stcli.main()) else: @@ -34,7 +43,7 @@ def main(args=None): def get_parser(): - parser = argparse.ArgumentParser() + parser = ArgumentParser() parser.add_argument( "-v", "--version", action="version", version="lusSTR v" + lusSTR.__version__ ) diff --git a/lusSTR/cli/config.py b/lusSTR/cli/config.py index 7c78abb..e3f68df 100644 --- a/lusSTR/cli/config.py +++ b/lusSTR/cli/config.py @@ -10,9 +10,7 @@ # Development Center. # ------------------------------------------------------------------------------------------------- -import argparse import importlib.resources -import lusSTR import os from pathlib import Path import yaml diff --git a/lusSTR/cli/gui.py b/lusSTR/cli/gui.py index e2fc94b..4ae34a9 100644 --- a/lusSTR/cli/gui.py +++ b/lusSTR/cli/gui.py @@ -9,14 +9,12 @@ # National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and # Development Center. # ------------------------------------------------------------------------------------------------- -################################################################# -# Importing Necessary Packages # -################################################################# from datetime import datetime import json import importlib.resources -from lusSTR.wrappers.filter import get_at, EFM_output, marker_plots, make_plot, STRmix_output +from lusSTR.gui import docs, select +from lusSTR.wrappers.filter import get_at, EFM_output, marker_plots, STRmix_output import math import numpy as np import pandas as pd @@ -30,11 +28,6 @@ import re -################################################################# -# Functions # -################################################################# - - def get_filter_metadata_file(): return importlib.resources.files("lusSTR") / "data/filters.json" @@ -59,40 +52,6 @@ def generate_config_file(config_data, working_directory, workflow_type): yaml.dump(config_data, file) -# ------------ Function for folder selection ------------------ # - - -def folder_picker_dialog(): - script_path = importlib.resources.files("lusSTR") / "scripts" / "folder_selector.py" - result = subprocess.run(["python", script_path], capture_output=True, text=True) - if result.returncode == 0: - folder_data = json.loads(result.stdout) - folder_path = folder_data.get("folder_path") - if folder_path: - return folder_path - else: - st.error("No folder selected") - else: - st.error("Error selecting folder") - - -# ------- Function for individual file selection -------------- # - - -def file_picker_dialog(): - script_path = importlib.resources.files("lusSTR") / "scripts" / "file_selector.py" - result = subprocess.run(["python", script_path], capture_output=True, text=True) - if result.returncode == 0: - file_data = json.loads(result.stdout) - file_path = file_data.get("file_path") - if file_path: - return file_path - else: - st.error("No folder selected") - else: - st.error("Error selecting folder") - - # ---- Function to validate prefix for output folder ---------- # @@ -105,81 +64,9 @@ def validate_prefix(prefix): return False -################################################################# -# Front-End Logic For Navigation Bar # -################################################################# - - def main(): - - # Page Layout (Theme and Fonts have been established in .streamlit/config.toml) - st.set_page_config(layout="wide", initial_sidebar_state="collapsed") - - # Creating Navigation Bar - - selected = option_menu( - menu_title=None, - options=["Home", "STRs", "SNPs", "How to Use", "Contact"], - icons=["house", "gear", "gear-fill", "book", "envelope"], - menu_icon="cast", - default_index=0, - orientation="horizontal", - ) - - if selected == "Home": - show_home_page() - - elif selected == "STRs": - show_STR_page() - - elif selected == "SNPs": - show_SNP_page() - - elif selected == "How to Use": - show_how_to_use_page() - - elif selected == "Contact": - show_contact_page() - - -##################################################################### -# lusSTR Home Page # -##################################################################### - - -def show_home_page(): - - image_path = importlib.resources.files("lusSTR") / "cli" / "logo.png" - - # CSS to hide full-screen button - hide_img_fs = """ - - """ - - # Define column layout for centering image - left_co, cent_co, last_co = st.columns([2.5, 8, 2.5]) - with cent_co: - st.image(str(image_path), use_column_width="auto") - - # Apply CSS to hide full-screen button - st.markdown(hide_img_fs, unsafe_allow_html=True) - - # -- Welcome Message Stuff - - st.markdown( - """ - lusSTR is an end-to-end workflow for processing human forensic data (STRs and SNPs) - derived from Next Generation Sequencing (NGS) data for use in probabilistic genotyping - software. For more information on lusSTR, visit our - [GitHub page](https://github.com/bioforensics/lusSTR). - """, - unsafe_allow_html=True, - ) - - st.info("Please Select One of the Tabs Above to Get Started on Processing Your Data!") + app = Application() + app.display() def df_on_change(locus): @@ -415,352 +302,6 @@ def create_settings(): st.session_state.output_type = config_settings["output_type"] -##################################################################### -# STR WORKFLOW # -##################################################################### - -##################################################################### -# Specify STR Settings Which Will Be Used to Generate Config File # -##################################################################### - - -def show_STR_page(): - - st.title("STR Workflow") - st.info( - "Please Select STR Settings Below for lusSTR! For Information Regarding the " - "Settings, See the How to Use Tab." - ) - - # Input File Specification - st.subheader("Input Files Selection") - - # Ask user if submitting a directory or individual file - st.info( - "Please Indicate If You Are Providing An Individual Input File or a Folder Containing " - "Multiple Input Files" - ) - input_option = st.radio( - "Select Input Option:", ("Individual File", "Folder with Multiple Files") - ) - - # Initialize session state if not already initialized - if "samp_input" not in st.session_state: - st.session_state.samp_input = None - - # Logic for Path Picker based on user's input option - - if input_option == "Folder with Multiple Files": - clicked = st.button("Select a Folder") - if clicked: - dirname = folder_picker_dialog() - st.session_state.samp_input = dirname - - else: - clicked_file = st.button("Select a File") - if clicked_file: - filename = file_picker_dialog() - st.session_state.samp_input = filename - - # Display The Selected Path - if st.session_state.samp_input: - st.text_input("Location Of Your Input File(s):", st.session_state.samp_input) - - # Store the Selected Path to Reference in Config - samp_input = st.session_state.samp_input - - ##################################################################### - # STR: Specify Working Directory # - ##################################################################### - - st.subheader("Output Folder Selection") - - col1, col2, col3, col4, col5 = st.columns(5) - - # Initialize session state if not already initialized - if "wd_dirname" not in st.session_state: - st.session_state.wd_dirname = None - - clicked_wd = col1.button("Select An Output Folder") - if clicked_wd: - wd = folder_picker_dialog() - st.session_state.wd_dirname = wd - - # Display selected path - if st.session_state.wd_dirname: - st.text_input("Your Specified Output Folder:", st.session_state.wd_dirname) - - # Store Selected Path to Reference in Config - wd_dirname = st.session_state.wd_dirname - - ##################################################################### - # STR: General Software Settings to Generate Config File # - ##################################################################### - - st.subheader("General Settings") - - col1, col2, col3, col4, col5 = st.columns(5) - - if "analysis_software" not in st.session_state: - st.session_state.analysis_software = None - - st.session_state.analysis_software = { - "UAS": "uas", - "STRait Razor v3": "straitrazor", - "GeneMarker HTS": "genemarker", - }[ - col1.selectbox( - "Analysis Software", - options=["UAS", "STRait Razor v3", "GeneMarker HTS"], - help="Indicate the analysis software used prior to lusSTR.", - ) - ] - - if "custom_ranges" not in st.session_state: - st.session_state.custom_ranges = None - - st.session_state.custom_ranges = st.checkbox( - "Use Custom Sequence Ranges", - help="Check the box to use the specified custom sequence ranges as defined in the " - "str_markers.json file.", - ) - - if "sex" not in st.session_state: - st.session_state.sex = None - - st.session_state.sex = st.checkbox( - "Include X- and Y-STRs", - help="Check the box to include X- and Y-STRs, otherwise leave unchecked.", - ) - - if "kit" not in st.session_state: - st.session_state.kit = None - - st.session_state.kit = {"ForenSeq Signature Prep": "forenseq", "PowerSeq 46GY": "powerseq"}[ - col2.selectbox( - "Library Preparation Kit", - options=["ForenSeq Signature Prep", "PowerSeq 46GY"], - help="Specify the library preparation kit used to generate the sequences.", - ) - ] - - if "output" not in st.session_state: - st.session_state.output = None - - st.session_state.output = col3.text_input( - "Output File Name", - "lusstr_output", - help="Please specify a name for the created files. It can only contain alphanumeric " - "characters, underscores and hyphens. No spaces allowed.", - ) - - if "nocombine" not in st.session_state: - st.session_state.nocombine = None - - st.session_state.nocombine = st.checkbox( - "Do Not Combine Identical Sequences", - help="If using STRait Razor data, by default, identical sequences (after removing " - "flanking sequences) are combined and reads are summed. Checking this will not combine" - " identical sequences.", - ) - - ##################################################################### - # STR: Filter Settings to Generate Config File # - ##################################################################### - - st.subheader("Filter Settings") - - col1, col2, col3, col4, col5 = st.columns(5) - - if "output_type" not in st.session_state: - st.session_state.output_type = None - - st.session_state.output_type = { - "STRmix": "strmix", - "EuroForMix": "efm", - "MPSproto": "mpsproto", - }[ - col1.selectbox( - "Probabilistic Genotyping Software", - options=["STRmix", "EuroForMix", "MPSproto"], - help="Select which probabilistic genotyping software files to create", - ) - ] - - if "profile_type" not in st.session_state: - st.session_state.profile_type = None - - st.session_state.profile_type = {"Evidence": "evidence", "Reference": "reference"}[ - col2.selectbox( - "Profile Type", - options=["Evidence", "Reference"], - help="Select the file type (format) to create for the probabilistic genotyping " - "software.", - ) - ] - - if "data_type" not in st.session_state: - st.session_state.data_type = None - - st.session_state.data_type = {"Sequence": "ngs", "CE allele": "ce", "LUS+ allele": "lusplus"}[ - col3.selectbox( - "Data Type", - options=["Sequence", "CE allele", "LUS+ allele"], - help="Select the allele type used to determine sequence type (belowAT, stutter or " - "typed) and used in the final output file.", - ) - ] - - if "info" not in st.session_state: - st.session_state.info = None - - st.session_state.info = st.checkbox( - "Create Allele Information File", - value=True, - help="Create file containing information about each sequence, including sequence type " - "(belowAT, stutter or typed), stuttering sequence information and metrics involving " - "stutter and noise.", - ) - - if "separate" not in st.session_state: - st.session_state.separate = None - - st.session_state.separate = st.checkbox( - "Create Separate Files for Samples", - help="If checked, will create individual files for samples; If unchecked, will create " - "one file with all samples.", - ) - - if "nofilters" not in st.session_state: - st.session_state.nofilters = None - - st.session_state.nofilters = st.checkbox( - "Skip All Filtering Steps", - help="Filtering will not be performed but will still create EFM/MPSproto/STRmix output " - "files containing all sequences.", - ) - - if "strand" not in st.session_state: - st.session_state.strand = None - - st.session_state.strand = {"UAS Orientation": "uas", "Forward Strand": "forward"}[ - col4.selectbox( - "Strand Orientation", - options=["Forward Strand", "UAS Orientation"], - help="Indicates the strand orientation in which to report the sequence in the final " - "output table as some markers are reported in the UAS on the reverse strand. " - "Selecting the UAS Orientation will report those markers on the reverse strand while" - " the remaining will be reported on the forward strand. Selecting the Forward Strand " - "will report all markers on the forward strand orientation. This applies to STRmix " - "NGS only.", - ) - ] - - ##################################################################### - # STR: Generate Config File Based on Settings # - ##################################################################### - - # Submit Button Instance - if st.button("Run lusSTR"): - - # Check if all required fields are filled - if ( - st.session_state.analysis_software - and st.session_state.samp_input - and st.session_state.output - and st.session_state.wd_dirname - ): - - # Validate output prefix - if not validate_prefix(st.session_state.output): - st.warning( - "Please enter a valid output prefix. Only alphanumeric characters, " - "underscore, and hyphen are allowed." - ) - st.stop() # Stop execution if prefix is invalid - - # Display loading spinner (Continuing Process Checks Above Were Passed) - with st.spinner("Processing Your Data..."): - - # Construct config data - - config_data = { - "analysis_software": st.session_state.analysis_software, - "custom_ranges": st.session_state.custom_ranges, - "sex": st.session_state.sex, - "samp_input": st.session_state.samp_input, - "output": st.session_state.output, - "kit": st.session_state.kit, - "nocombine": st.session_state.nocombine, - "output_type": st.session_state.output_type, - "profile_type": st.session_state.profile_type, - "data_type": st.session_state.data_type, - "info": st.session_state.info, - "separate": st.session_state.separate, - "nofilters": st.session_state.nofilters, - "strand": st.session_state.strand, - } - - # Generate YAML config file - generate_config_file(config_data, st.session_state.wd_dirname, "STR") - - # Subprocess lusSTR commands - command = ["lusstr", "strs", "all"] - - # Specify WD to lusSTR - if wd_dirname: - command.extend(["-w", st.session_state.wd_dirname + "/"]) - - # Run lusSTR command in terminal - try: - subprocess.run(command, check=True) - st.success( - "Config File Generated and lusSTR Executed Successfully! Output Files" - "Have Been Saved to Your Designated Directory and Labeled with your " - "Specified Prefix" - ) - except subprocess.CalledProcessError as e: - st.error(f"Error: {e}") - st.info( - "Please make sure to check the 'How to Use' tab for common error " - "resolutions." - ) - - else: - st.warning( - "Please make sure to fill out all required fields (Analysis Software, Input " - "Directory or File, Prefix for Output, and Specification of Working Directory) " - "before submitting." - ) - st.write("---") - st.write( - "After running lusSTR, or if lusSTR has been run previously, the user may view and edit " - "the individual STR marker plots and data." - ) - st.write( - "If lusSTR has been previously run, only the above ```Output Folder``` containing the run" - " files needs to be specified. Other settings will be automatically loaded from the " - "config.yaml file within the specified folder." - ) - if "interactive" not in st.session_state: - st.session_state.interactive = None - if st.button("See Individual Marker Plots & Data") or st.session_state.interactive: - st.session_state.interactive = True - create_settings() - if st.session_state.custom_ranges: - file = ( - f"{st.session_state.wd_dirname}/{st.session_state.output}/" - f"{st.session_state.output}_custom_range" - ) - else: - file = f"{wd_dirname}/{st.session_state.output}/{st.session_state.output}" - try: - sequence_info = pd.read_csv(f"{file}_sequence_info.csv") - interactive_setup(sequence_info, file) - except FileNotFoundError: - print(f"{file}_sequence_info.csv not found. Please check output folder specification.") - - ##################################################################### # SNP WORKFLOW # ##################################################################### @@ -799,16 +340,12 @@ def show_SNP_page(): if input_option == "Folder with Multiple Files": clicked = st.button("Please Select a Folder") if clicked: - dirname = folder_picker_dialog() - # st.text_input('You Selected The Following folder:', dirname) - st.session_state.samp_input = dirname + st.session_state.samp_input = select.folder() else: clicked_file = st.button("Please Select a File") if clicked_file: - filename = file_picker_dialog() - # st.text_input('You Selected The Following file:', filename) - st.session_state.samp_input = filename + st.session_state.samp_input = select.file() # Display The Selected Path if st.session_state.samp_input: @@ -922,8 +459,7 @@ def show_SNP_page(): clicked_wd = col1.button("Please Select An Output Folder") if clicked_wd: - wd = folder_picker_dialog() - st.session_state.wd_dirname = wd + st.session_state.wd_dirname = select.folder() # Display selected path if st.session_state.wd_dirname: @@ -1002,94 +538,286 @@ def show_SNP_page(): ) -##################################################################### -# How To Use Page # -##################################################################### - - -def show_how_to_use_page(): - - st.title("Common Errors and Best Practices for Using lusSTR") - - st.header("1. File/Folder Path Formatting") - - st.write( - "Please ensure that the displayed path accurately reflects your selection. When using the" - " file or folder picker, navigate to the desired location and click 'OK' to confirm your " - "selection." - ) - - st.header("2. Specifying Output Prefix") - - st.write( - "The purpose of specifying the output prefix is for lusSTR to create result files and " - "folders with that prefix in your working directory. Please ensure that you are following" - " proper file naming formatting and rules when specifying this prefix. Avoid using " - "characters such as '/', '', '.', and others. Note: To avoid potential errors, you can " - "simply use the default placeholder for output." - ) - - st.code("Incorrect: 'working_directory/subfolder/subfolder'\nCorrect: output") - - st.write( - "Note that some result files may be saved directly in the working directory with the " - "specified prefix, while others will be populated in a folder labeled with the prefix " - "in your working directory." - ) - st.write("Be aware of this behavior when checking for output files.") - - st.header("3. Specifying Output Folder") - st.write( - "Please Ensure That You Properly Specify an Output Folder. This is where all lusSTR " - "output files will be saved. To avoid potential errors, specifying a working directory " - "is required." - ) - - st.title("About lusSTR") - - st.markdown( - """ - - **_lusSTR Accommodates Four Different Input Formats:_** - - (1) UAS Sample Details Report, UAS Sample Report, and UAS Phenotype Report (for SNP " - "processing) in .xlsx format (a single file or directory containing multiple files) - - (2) STRait Razor v3 output with one sample per file (a single file or directory containing" - " multiple files) - - (3) GeneMarker v2.6 output (a single file or directory containing multiple files) +class Application: + def __init__(self): + st.set_page_config(layout="wide", initial_sidebar_state="collapsed") + self.selected = option_menu( + menu_title=None, + options=["Home", "STRs", "SNPs", "How to Use", "Contact"], + icons=["house", "gear", "gear-fill", "book", "envelope"], + menu_icon="cast", + default_index=0, + orientation="horizontal", + ) + self.samp_input = None + self.wd_dirname = None + + def display(self): + if self.selected == "Home": + docs.home_page() + elif self.selected == "STRs": + self.str_page() + elif self.selected == "SNPs": + show_SNP_page() + elif self.selected == "How to Use": + docs.how_to_use_page() + elif self.selected == "Contact": + docs.contact_page() + + def str_page(self): + st.title("STR Workflow") + st.info( + "Please Select STR Settings Below for lusSTR. For information regarding the settings, see the How to Use tab." + ) + self.str_io() + self.str_settings() - (4) Sample(s) sequences in CSV format; first four columns must be Locus, NumReads, Sequence, " - "SampleID; Optional last two columns can be Project and Analysis IDs. + self.str_footer() + ##################################################################### + # STR: Generate Config File Based on Settings # + ##################################################################### - """, - unsafe_allow_html=True, - ) + # Submit Button Instance + def str_io(self): + self.str_input() + self.str_output() -##################################################################### -# Contact Page # -##################################################################### + def str_input(self): + st.subheader("Specify Worfklow Inputs") + st.info( + "Indicate whether you are providing an individual file or a folder containing multiple files" + ) + if "samp_input" not in st.session_state: + st.session_state.samp_input = None + input_option = st.radio( + "Select Input Option:", ("Individual file", "Folder with multiple files") + ) + if input_option == "Folder with Multiple Files": + clicked = st.button("Select a folder") + if clicked: + st.session_state.samp_input = select.folder() + else: + clicked_file = st.button("Select a file") + if clicked_file: + st.session_state.samp_input = select.file() + if st.session_state.samp_input: + st.text_input("Location of your input file(s):", st.session_state.samp_input) + self.samp_input = st.session_state.samp_input + + def str_output(self): + st.subheader("Specify Working Directory for Workflow Outputs") + columns = st.columns(5) + if "wd_dirname" not in st.session_state: + st.session_state.wd_dirname = None + clicked_wd = columns[0].button("Select Output Folder") + if clicked_wd: + st.session_state.wd_dirname = select.folder() + if st.session_state.wd_dirname: + st.text_input("Working directory:", st.session_state.wd_dirname) + self.wd_dirname = st.session_state.wd_dirname + + def str_settings(self): + self.str_general_settings() + self.str_filter_settings() + + def str_general_settings(self): + st.subheader("General Settings") + columns = st.columns(5) + if "analysis_software" not in st.session_state: + st.session_state.analysis_software = None + selected_software = columns[0].selectbox( + "Analysis Software", + options=["UAS", "STRait Razor v3", "GeneMarker HTS"], + help="Indicate the analysis software used prior to lusSTR.", + ) + software = { + "UAS": "uas", + "STRait Razor v3": "straitrazor", + "GeneMarker HTS": "genemarker", + } + st.session_state.analysis_software = software[selected_software] + if "custom_ranges" not in st.session_state: + st.session_state.custom_ranges = None + st.session_state.custom_ranges = st.checkbox( + "Use Custom Sequence Ranges", + help="Check the box to use the specified custom sequence ranges as defined in the `str_markers.json` file.", + ) + if "sex" not in st.session_state: + st.session_state.sex = None + st.session_state.sex = st.checkbox( + "Include X- and Y-STRs", + help="Check the box to include X- and Y-STRs, otherwise leave unchecked.", + ) + if "kit" not in st.session_state: + st.session_state.kit = None + selected_kit = columns[1].selectbox( + "Library Preparation Kit", + options=["ForenSeq Signature Prep", "PowerSeq 46GY"], + help="Specify the library preparation kit used to generate the sequences.", + ) + kits = {"ForenSeq Signature Prep": "forenseq", "PowerSeq 46GY": "powerseq"} + st.session_state.kit = kits[selected_kit] + if "output" not in st.session_state: + st.session_state.output = None + st.session_state.output = columns[2].text_input( + "Output File Name", + "lusstr_output", + help="Please specify a name for the created files. It can only contain alphanumeric characters, underscores and hyphens. No spaces allowed.", + ) + if "nocombine" not in st.session_state: + st.session_state.nocombine = None + st.session_state.nocombine = st.checkbox( + "Do Not Combine Identical Sequences", + help="If using STRait Razor data, by default, identical sequences (after removing flanking sequences) are combined and reads are summed. Checking this will not combine identical sequences.", + ) + def str_filter_settings(self): + st.subheader("Filter Settings") + columns = st.columns(5) + if "output_type" not in st.session_state: + st.session_state.output_type = None + st.session_state.output_type = { + "STRmix": "strmix", + "EuroForMix": "efm", + "MPSproto": "mpsproto", + }[ + columns[0].selectbox( + "Probabilistic Genotyping Software", + options=["STRmix", "EuroForMix", "MPSproto"], + help="Select which probabilistic genotyping software files to create", + ) + ] + if "profile_type" not in st.session_state: + st.session_state.profile_type = None + st.session_state.profile_type = {"Evidence": "evidence", "Reference": "reference"}[ + columns[1].selectbox( + "Profile Type", + options=["Evidence", "Reference"], + help="Select the file type (format) to create for the probabilistic genotyping software.", + ) + ] + if "data_type" not in st.session_state: + st.session_state.data_type = None + st.session_state.data_type = { + "Sequence": "ngs", + "CE allele": "ce", + "LUS+ allele": "lusplus", + }[ + columns[2].selectbox( + "Data Type", + options=["Sequence", "CE allele", "LUS+ allele"], + help="Select the allele type used to determine sequence type (belowAT, stutter or typed) and used in the final output file.", + ) + ] + if "info" not in st.session_state: + st.session_state.info = None + st.session_state.info = st.checkbox( + "Create Allele Information File", + value=True, + help="Create file containing information about each sequence, including sequence type (belowAT, stutter or typed), stuttering sequence information and metrics involving stutter and noise.", + ) + if "separate" not in st.session_state: + st.session_state.separate = None + st.session_state.separate = st.checkbox( + "Create Separate Files for Samples", + help="If checked, will create individual files for samples; If unchecked, will create one file with all samples.", + ) + if "nofilters" not in st.session_state: + st.session_state.nofilters = None + st.session_state.nofilters = st.checkbox( + "Skip All Filtering Steps", + help="Filtering will not be performed but will still create EFM/MPSproto/STRmix output files containing all sequences.", + ) + if "strand" not in st.session_state: + st.session_state.strand = None + st.session_state.strand = {"UAS Orientation": "uas", "Forward Strand": "forward"}[ + columns[3].selectbox( + "Strand Orientation", + options=["Forward Strand", "UAS Orientation"], + help="Indicates the strand orientation in which to report the sequence in the final output table as some markers are reported in the UAS on the reverse strand. Selecting the UAS Orientation will report those markers on the reverse strand while the remaining will be reported on the forward strand. Selecting the Forward Strand will report all markers on the forward strand orientation. This applies to STRmix NGS only.", + ) + ] -def show_contact_page(): - st.title("Contact Us") - st.write( - "For any questions or issues, please contact rebecca.mitchell@st.dhs.gov, " - "daniel.standage@st.dhs.gov, or s.h.syed@email.msmary.edu" - ) + def str_launch_workflow(self): + if st.button("Run lusSTR"): + if ( + st.session_state.analysis_software + and st.session_state.samp_input + and st.session_state.output + and st.session_state.wd_dirname + ): + if not validate_prefix(st.session_state.output): + st.warning( + "Please enter a valid output prefix. Only alphanumeric characters, underscore, and hyphen are allowed." + ) + st.stop() + with st.spinner("Processing your data..."): + config_data = { + "analysis_software": st.session_state.analysis_software, + "custom_ranges": st.session_state.custom_ranges, + "sex": st.session_state.sex, + "samp_input": st.session_state.samp_input, + "output": st.session_state.output, + "kit": st.session_state.kit, + "nocombine": st.session_state.nocombine, + "output_type": st.session_state.output_type, + "profile_type": st.session_state.profile_type, + "data_type": st.session_state.data_type, + "info": st.session_state.info, + "separate": st.session_state.separate, + "nofilters": st.session_state.nofilters, + "strand": st.session_state.strand, + } + generate_config_file(config_data, st.session_state.wd_dirname, "STR") + command = ["lusstr", "strs", "all"] + if self.wd_dirname: + command.extend(["-w", st.session_state.wd_dirname + "/"]) + try: + subprocess.run(command, check=True) + st.success( + "Config file generated and lusSTR executed successfully! Output files have been saved to your designated directory and labeled with your specified prefix." + ) + except subprocess.CalledProcessError as e: + st.error(f"Error: {e}") + st.info( + "Please make sure to check the 'How to Use' tab for common error resolutions." + ) + else: + st.warning( + "Please make sure to fill out all required fields (Analysis Software, Input Directory or File, Prefix for Output, and Specification of Working Directory) before submitting." + ) + def str_footer(self): + st.write("---") + st.write( + "After running lusSTR, or if lusSTR has been run previously, the user may view and edit the individual STR marker plots and data." + ) + st.write( + "If lusSTR has been previously run, only the above ```Output Folder``` containing the run files needs to be specified. Other settings will be automatically loaded from the config.yaml file within the specified folder." + ) + if "interactive" not in st.session_state: + st.session_state.interactive = None + if st.button("See Individual Marker Plots & Data") or st.session_state.interactive: + st.session_state.interactive = True + create_settings() + if st.session_state.custom_ranges: + file = f"{st.session_state.wd_dirname}/{st.session_state.output}/{st.session_state.output}_custom_range" + else: + file = f"{self.wd_dirname}/{st.session_state.output}/{st.session_state.output}" + try: + sequence_info = pd.read_csv(f"{file}_sequence_info.csv") + interactive_setup(sequence_info, file) + except FileNotFoundError: + print( + f"{file}_sequence_info.csv not found. Please check output folder specification." + ) -##################################################################### -# lusSTR RUN # -##################################################################### if __name__ == "__main__": main() def subparser(subparsers): - parser = subparsers.add_parser("gui", help="Launch the Streamlit GUI") - parser.set_defaults(func=main) + subparsers.add_parser("gui", description="Launch the lusSTR GUI") diff --git a/lusSTR/cli/snps.py b/lusSTR/cli/snps.py index edb3a2a..9250be4 100644 --- a/lusSTR/cli/snps.py +++ b/lusSTR/cli/snps.py @@ -10,7 +10,6 @@ # Development Center. # ------------------------------------------------------------------------------------------------- -import argparse import lusSTR from snakemake import snakemake diff --git a/lusSTR/cli/strs.py b/lusSTR/cli/strs.py index d5fbaa6..9511a50 100644 --- a/lusSTR/cli/strs.py +++ b/lusSTR/cli/strs.py @@ -10,7 +10,6 @@ # Development Center. # ------------------------------------------------------------------------------------------------- -import argparse import lusSTR from snakemake import snakemake diff --git a/lusSTR/gui/__init__.py b/lusSTR/gui/__init__.py new file mode 100644 index 0000000..b41c5ad --- /dev/null +++ b/lusSTR/gui/__init__.py @@ -0,0 +1,11 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2024, DHS. +# +# This file is part of lusSTR (http://github.com/bioforensics/lusSTR) and is licensed under +# the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- diff --git a/lusSTR/gui/app.py b/lusSTR/gui/app.py new file mode 100644 index 0000000..b41c5ad --- /dev/null +++ b/lusSTR/gui/app.py @@ -0,0 +1,11 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2024, DHS. +# +# This file is part of lusSTR (http://github.com/bioforensics/lusSTR) and is licensed under +# the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- diff --git a/lusSTR/gui/docs.py b/lusSTR/gui/docs.py new file mode 100644 index 0000000..0e956cb --- /dev/null +++ b/lusSTR/gui/docs.py @@ -0,0 +1,94 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2024, DHS. +# +# This file is part of lusSTR (http://github.com/bioforensics/lusSTR) and is licensed under +# the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- + +from importlib.resources import files +import streamlit as st + + +def home_page(): + image_path = files("lusSTR") / "cli" / "logo.png" + left_column, center_column, right_column = st.columns([2.5, 8, 2.5]) + with center_column: + st.image(str(image_path), use_column_width="auto") + + # CSS to hide full-screen button + hide_img_fs = """ + + """ + st.markdown(hide_img_fs, unsafe_allow_html=True) + + st.markdown( + """ + lusSTR is an end-to-end workflow for processing human forensic data (STRs and SNPs) + derived from Next Generation Sequencing (NGS) data for use in probabilistic genotyping + software. For more information on lusSTR, visit our + [GitHub page](https://github.com/bioforensics/lusSTR). + """, + unsafe_allow_html=True, + ) + st.info("Please Select One of the Tabs Above to Get Started on Processing Your Data!") + + +def how_to_use_page(): + st.title("Common Errors and Best Practices for Using lusSTR") + st.header("1. File/Folder Path Formatting") + st.write( + "Please ensure that the displayed path accurately reflects your selection. When using " + "the file or folder picker, navigate to the desired location and click 'OK' to " + "confirm your selection." + ) + st.header("2. Specifying Output Prefix") + st.write( + "The purpose of specifying the output prefix is for lusSTR to create result files and " + "folders with that prefix in your working directory. Please ensure that you are " + "following proper file naming formatting and rules when specifying this prefix. Avoid " + "using characters such as '/', '', '.', and others. Note: To avoid potential errors, " + "you can simply use the default placeholder for output." + ) + st.code("Incorrect: 'working_directory/subfolder/subfolder'\nCorrect: output") + st.write( + "Note that some result files may be saved directly in the working directory with the " + "specified prefix, while others will be populated in a folder labeled with the prefix " + "in your working directory." + ) + st.write("Be aware of this behavior when checking for output files.") + st.header("3. Specifying Output Folder") + st.write( + "Please Ensure That You Properly Specify an Output Folder. This is where all lusSTR " + "output files will be saved. To avoid potential errors, specifying a working " + "directory is required." + ) + st.title("About lusSTR") + st.markdown(""" + **_lusSTR Accommodates Four Different Input Formats:_** + + (1) UAS Sample Details Report, UAS Sample Report, and UAS Phenotype Report (for SNP " + "processing) in .xlsx format (a single file or directory containing multiple files) + + (2) STRait Razor v3 output with one sample per file (a single file or directory containing" + " multiple files) + + (3) GeneMarker v2.6 output (a single file or directory containing multiple files) + + (4) Sample(s) sequences in CSV format; first four columns must be Locus, NumReads, Sequence, " + "SampleID; Optional last two columns can be Project and Analysis IDs. + """, + unsafe_allow_html=True, + ) + + +def contact_page(): + st.title("Contact Us") + st.write("For any questions or issues, please contact rebecca.mitchell@st.dhs.gov or daniel.standage@st.dhs.gov.") diff --git a/lusSTR/gui/select.py b/lusSTR/gui/select.py new file mode 100644 index 0000000..e394b55 --- /dev/null +++ b/lusSTR/gui/select.py @@ -0,0 +1,44 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2024, DHS. +# +# This file is part of lusSTR (http://github.com/bioforensics/lusSTR) and is licensed under +# the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- + +from importlib.resources import files +import json +import streamlit as st +from subprocess import run + + +def folder(): + script = files("lusSTR") / "scripts" / "folder_selector.py" + result = run(["python", script], capture_output=True, text=True) + if result.returncode == 0: + folder_data = json.loads(result.stdout) + folder_path = folder_data.get("folder_path") + if folder_path: + return folder_path + else: + st.error("No folder selected") + else: + st.error("Error selecting folder") + + +def file(): + script = files("lusSTR") / "scripts" / "file_selector.py" + result = run(["python", script], capture_output=True, text=True) + if result.returncode == 0: + file_data = json.loads(result.stdout) + file_path = file_data.get("file_path") + if file_path: + return file_path + else: + st.error("No folder selected") + else: + st.error("Error selecting folder") diff --git a/lusSTR/wrappers/snps_convert.py b/lusSTR/wrappers/snps_convert.py index ad73276..dbbf4f5 100644 --- a/lusSTR/wrappers/snps_convert.py +++ b/lusSTR/wrappers/snps_convert.py @@ -66,13 +66,9 @@ def bin_snps(sample_file, output_type, sample): start = snp_num * 1000 if snp_num != 9: end = start + 1000 - bin_df = sorted_file.iloc[ - start:end, - ].reset_index(drop=True) + bin_df = sorted_file.iloc[start:end,].reset_index(drop=True) else: - bin_df = sorted_file.iloc[ - start : len(sorted_file), - ].reset_index(drop=True) + bin_df = sorted_file.iloc[start : len(sorted_file),].reset_index(drop=True) bin_df["Sample.Name"] = bin_df["Sample.Name"] + "_set" + str((snp_num + 1)) compiled_table = pd.concat([compiled_table, bin_df]) bin_df.to_csv( From e2e69e855e9726b5c13a1dab3681aca742f127e7 Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Fri, 18 Oct 2024 15:44:19 -0400 Subject: [PATCH 2/3] More code reorganization --- lusSTR/cli/gui.py | 805 +----------------------------- lusSTR/gui/__init__.py | 30 ++ lusSTR/gui/{app.py => contact.py} | 8 + lusSTR/gui/docs.py | 94 ---- lusSTR/gui/home.py | 42 ++ lusSTR/gui/howto.py | 62 +++ lusSTR/gui/snps.py | 251 ++++++++++ lusSTR/gui/strs.py | 498 ++++++++++++++++++ lusSTR/gui/util.py | 31 ++ 9 files changed, 928 insertions(+), 893 deletions(-) rename lusSTR/gui/{app.py => contact.py} (75%) delete mode 100644 lusSTR/gui/docs.py create mode 100644 lusSTR/gui/home.py create mode 100644 lusSTR/gui/howto.py create mode 100644 lusSTR/gui/snps.py create mode 100644 lusSTR/gui/strs.py create mode 100644 lusSTR/gui/util.py diff --git a/lusSTR/cli/gui.py b/lusSTR/cli/gui.py index 4ae34a9..f188a1f 100644 --- a/lusSTR/cli/gui.py +++ b/lusSTR/cli/gui.py @@ -10,809 +10,16 @@ # Development Center. # ------------------------------------------------------------------------------------------------- -from datetime import datetime -import json -import importlib.resources -from lusSTR.gui import docs, select -from lusSTR.wrappers.filter import get_at, EFM_output, marker_plots, STRmix_output -import math -import numpy as np -import pandas as pd -from pathlib import Path -import plotly.express as px -import streamlit as st -from streamlit_option_menu import option_menu -import yaml -import subprocess -import os -import re - - -def get_filter_metadata_file(): - return importlib.resources.files("lusSTR") / "data/filters.json" - - -with open(get_filter_metadata_file(), "r") as fh: - filter_marker_data = json.load(fh) - - -# ------------ Function to Generate config.yaml File ---------- # - - -def generate_config_file(config_data, working_directory, workflow_type): - if workflow_type == "STR": - config_filename = "config.yaml" - elif workflow_type == "SNP": - config_filename = "snp_config.yaml" - else: - raise ValueError("Invalid workflow type. Please specify either 'STR' or 'SNP'.") - - config_path = os.path.join(working_directory, config_filename) - with open(config_path, "w") as file: - yaml.dump(config_data, file) - - -# ---- Function to validate prefix for output folder ---------- # - - -def validate_prefix(prefix): - if re.match( - r"^[A-Za-z0-9_-]+$", prefix - ): # Allow alphanumeric characters, underscore, and hyphen - return True - else: - return False +from lusSTR.gui import initialize +from lusSTR.gui.snps import show_SNP_page def main(): - app = Application() - app.display() - - -def df_on_change(locus): - state = st.session_state[f"{locus}_edited"] - for index, updates in state["edited_rows"].items(): - st.session_state[locus].loc[st.session_state[locus].index == index, "edited"] = True - for key, value in updates.items(): - st.session_state[locus].loc[st.session_state[locus].index == index, key] = value - - -def interactive_plots_allmarkers(sample_df, flagged_df): - cols = st.columns(4) - max_reads = max(sample_df["Reads"]) - n = 100 if max_reads > 1000 else 10 - max_yvalue = int(math.ceil(max_reads / n)) * n - increase_value = int(math.ceil((max_yvalue / 5)) / n) * n - n = 0 - for marker in sample_df["Locus"].unique(): - col = cols[n] - container = col.container(border=True) - sample_locus = sample_df["SampleID"].unique() + "_" + marker - marker_df = sample_df[sample_df["Locus"] == marker].sort_values(by="CE_Allele") - if sample_locus in flagged_df["key"].values: - marker = f"⚠️{marker}⚠️" - plot = interactive_plots(marker_df, marker, max_yvalue, increase_value, all=True) - container.plotly_chart(plot, use_container_width=True) - if n == 3: - n = 0 - else: - n += 1 - - -def interactive_plots(df, locus, ymax, increase, all=False): - if "⚠️" in locus: - locus_at = locus.replace("⚠️", "") - else: - locus_at = locus - at = get_at(df, locus_at) - for i, row in df.iterrows(): - if "stutter" in df.loc[i, "allele_type"]: - df.loc[i, "Label"] = "Stutter" - else: - df.loc[i, "Label"] = df.loc[i, "allele_type"] - min_x = round(min(df["CE_Allele"]) - 1) - max_x = round(max(df["CE_Allele"]) + 1) - plot = px.bar( - df, - x="CE_Allele", - y="Reads", - color="Label", - color_discrete_map={ - "Typed": "green", - "BelowAT": "red", - "Stutter": "blue", - "Deleted": "purple", - }, - title=locus, - ) - plot.add_hline(y=at, line_width=3, line_dash="dot", line_color="gray") - plot.add_annotation(text=f"AT", x=min_x + 0.1, y=at, showarrow=False, yshift=10) - plot.update_layout( - xaxis=dict(range=[min_x, max_x], tickmode="array", tickvals=np.arange(min_x, max_x, 1)) - ) - if all: - plot.update_layout( - yaxis=dict(range=[0, ymax], tickmode="array", tickvals=np.arange(0, ymax, increase)) - ) - return plot - - -def remake_final_files(full_df, outpath): - if st.session_state.custom_ranges: - seq_col = "Custom_Range_Sequence" - brack_col = "Custom_Bracketed_Notation" - else: - seq_col = ( - "UAS_Output_Sequence" - if st.session_state.strand == "uas" - else "Forward_Strand_Sequence" - ) - brack_col = ( - "UAS_Output_Bracketed_Notation" - if st.session_state.strand == "uas" - else "Forward_Strand_Bracketed_Notation" - ) - if st.session_state.nofilters: - full_df["allele_type"] = "Typed" - if st.session_state.output_type == "efm" or st.session_state.output_type == "mpsproto": - EFM_output( - full_df, - outpath, - st.session_state.profile_type, - st.session_state.data_type, - brack_col, - st.session_state.sex, - st.session_state.separate, - ) + app = initialize() + if app is None: + show_SNP_page() else: - STRmix_output( - full_df, outpath, st.session_state.profile_type, st.session_state.data_type, seq_col - ) - - -def interactive_setup(df1, file): - col1, col2, col3, col4, col5 = st.columns(5) - sample = col1.selectbox("Select Sample:", options=df1["SampleID"].unique()) - sample_df = df1[df1["SampleID"] == sample].reset_index(drop=True) - locus_list = pd.concat([pd.Series("All Markers"), sample_df["Locus"].drop_duplicates()]) - if os.path.isfile(f"{file}_Flagged_Loci.csv"): - flags = pd.read_csv(f"{file}_Flagged_Loci.csv") - else: - flags = pd.DataFrame(columns=["key", "SampleID", "Locus"]) - flags["key"] = flags["SampleID"] + "_" + flags["Locus"] - flags_sample = flags[flags["SampleID"] == sample].reset_index(drop=True) - for flagged_locus in flags_sample["Locus"].unique(): - locus_list = locus_list.str.replace(flagged_locus, f"⚠️{flagged_locus}⚠️") - locus = col2.selectbox("Select Marker:", options=locus_list) - if "⚠️" in locus: - locus = locus.replace("⚠️", "") - if locus == "All Markers": - if not flags_sample.empty: - st.write( - f"⚠️ indicates potential problems with the marker. Examine the individual marker " - f"plots for more information." - ) - interactive_plots_allmarkers(sample_df, flags) - else: - locus_key = f"{sample}_{locus}" - if locus_key not in st.session_state: - st.session_state[locus_key] = sample_df[sample_df["Locus"] == locus].reset_index( - drop=True - ) - Type = [ - "Deleted", - "Typed", - "-1_stutter", - "-2_stutter", - "BelowAT", - "-1_stutter/+1_stutter", - "+1_stutter", - ] - plot = interactive_plots(st.session_state[locus_key], locus, None, None) - st.plotly_chart(plot, use_container_width=True) - col1, col2, col3 = st.columns(3) - if locus_key in flags["key"].values: - locus_flags = flags[flags["key"] == locus_key] - for flag in locus_flags["Flags"].unique(): - col2.write(f"⚠️ Potential issue: {flag} identified!") - st.data_editor( - data=st.session_state[locus_key], - disabled=( - "SampleID", - "Locus", - "UAS_Output_Sequence", - "CE_Allele", - "UAS_Output_Bracketed_Notation", - "Custom_Range_Sequence", - "Custom_Bracketed_Notation", - "Reads", - "parent_allele1", - "parent_allele2", - "allele1_ref_reads", - "allele2_ref_reads", - "perc_noise", - "perc_stutter", - ), - column_config={ - "allele_type": st.column_config.SelectboxColumn("allele_type", options=Type) - }, - hide_index=True, - key=f"{locus_key}_edited", - on_change=df_on_change, - args=(locus_key,), - ) - if st.button("Save Edits"): - ph = st.empty() - with ph.container(): - st.write("Saving Changes - May take a minute or two.") - combined_df = pd.DataFrame() - for sample in df1["SampleID"].unique(): - sample_df = df1[df1["SampleID"] == sample].reset_index(drop=True) - for locus in sample_df["Locus"].unique(): - locus_key = f"{sample}_{locus}" - try: - combined_df = pd.concat([combined_df, st.session_state[locus_key]]) - except KeyError: - combined_df = pd.concat( - [ - combined_df, - sample_df[sample_df["Locus"] == locus].reset_index(drop=True), - ] - ) - now = datetime.now() - dt = now.strftime("%m%d%Y_%H_%M_%S") - del combined_df["Label"] - Path(f"{st.session_state.wd_dirname}/{st.session_state.output}/edited_{dt}").mkdir( - parents=True, exist_ok=True - ) - outpath = f"{st.session_state.wd_dirname}/{st.session_state.output}/edited_{dt}/" - marker_plots(combined_df, f"{st.session_state.output}_edited_{dt}", sex=False, wd=outpath) - combined_df.to_csv( - f"{st.session_state.wd_dirname}/{st.session_state.output}/edited_{dt}/" - f"{st.session_state.output}_sequence_info_edited_{dt}.csv", - index=False, - ) - new_text = ( - f"Changes saved to {st.session_state.wd_dirname}/{st.session_state.output}" - f"/edited_{dt}/{st.session_state.output}_sequence_info_edited_{dt}.csv" - f"New {st.session_state.output_type} files created in {st.session_state.wd_dirname}" - f"/{st.session_state.output}/edited_{dt}/ folder" - ) - remake_final_files(combined_df, outpath) - ph.empty() - with ph.container(): - st.write( - f"New files and marker plots with edits saved to {st.session_state.wd_dirname}/" - f"{st.session_state.output}/edited_{dt}/" - ) - - -def create_settings(): - if os.path.isfile(f"{st.session_state.wd_dirname}/config.yaml"): - st.write(f"Loading settings from {st.session_state.wd_dirname}/config.yaml") - with open(f"{st.session_state.wd_dirname}/config.yaml", "r") as file: - config_settings = yaml.safe_load(file) - st.session_state.output = config_settings["output"] - st.session_state.custom_ranges = config_settings["custom_ranges"] - st.session_state.profile_type = config_settings["profile_type"] - st.session_state.data_type = config_settings["data_type"] - st.session_state.sex = config_settings["sex"] - st.session_state.separate = config_settings["separate"] - st.session_state.strand = config_settings["strand"] - st.session_state.output_type = config_settings["output_type"] - - -##################################################################### -# SNP WORKFLOW # -##################################################################### - -##################################################################### -# Specify SNP Settings Which Will Be Used to Generate Config File # -##################################################################### - - -def show_SNP_page(): - - st.title("SNP Workflow") - st.info( - "Please Select SNP Settings Below for lusSTR! For Information Regarding the Settings," - " See the How to Use Tab." - ) - - # Input File Specification - st.subheader("Input Files Selection") - - # Ask user if submitting a directory or individual file - st.info( - "Please Indicate If You Are Providing An Individual Input File or a Folder Containing " - "Multiple Input Files" - ) - input_option = st.radio( - "Select Input Option:", ("Individual File", "Folder with Multiple Files") - ) - - # Initialize session state if not already initialized - if "samp_input" not in st.session_state: - st.session_state.samp_input = None - - # Logic for Path Picker based on user's input option - - if input_option == "Folder with Multiple Files": - clicked = st.button("Please Select a Folder") - if clicked: - st.session_state.samp_input = select.folder() - - else: - clicked_file = st.button("Please Select a File") - if clicked_file: - st.session_state.samp_input = select.file() - - # Display The Selected Path - if st.session_state.samp_input: - st.text_input("Location Of Your Input File(s):", st.session_state.samp_input) - - # Store Selected Path to Reference in Config - samp_input = st.session_state.samp_input - - ##################################################################### - # SNP: General Software Settings to Generate Config File # - ##################################################################### - - st.subheader("General Settings") - - col1, col2, col3, col4, col5 = st.columns(5) - - analysis_software = {"UAS": "uas", "STRait Razor v3": "straitrazor"}[ - col1.selectbox( - "Analysis Software", - options=["UAS", "STRait Razor v3"], - help="Indicate the analysis software used prior to lusSTR sex.", - ) - ] - - output = col2.text_input( - "Output File Name", "lusstr_output", help="Please specify a name for the created files." - ) - - kit = {"Signature Prep": "sigprep", "Kintelligence": "kintelligence"}[ - col3.selectbox("Library Preparation Kit", options=["Signature Prep", "Kintelligence"]) - ] - - ##################################################################### - # SNP: Format Settings to Generate Config File # - ##################################################################### - - st.subheader("Convert Settings") - - col1, col2, col3, col4, col5 = st.columns(5) - - # -- Select Type (Unique to SNP Workflow) - types_mapping = { - "Identify SNPs": "i", - "Phenotype SNPs": "p", - "Ancestry SNPs": "a", - "All SNPs": "all", - } - selected_types = col1.multiselect( - "Select SNP Types:", - options=types_mapping.keys(), - help="Select the SNP types to process; can select one or more options", - ) - types_string = ( - "all" - if "All" in selected_types - else ", ".join(types_mapping.get(t, t) for t in selected_types) - ) - - # -- Filter - nofilters = st.checkbox( - "Skip all filtering steps", - help="Specify for no filtering", - ) - - ##################################################################### - # SNP: Convert Settings to Generate Config File # - ##################################################################### - - separate = st.checkbox( - "Create Separate Files for Samples", - help="If want to separate samples into individual files for use in EFM", - ) - - strand = {"UAS Orientation": "uas", "Forward Strand": "forward"}[ - col2.selectbox( - "Strand Orientation", - options=["UAS Orientation", "Forward Strand"], - help="Indicate which orientation to report the alleles for the SigPrep SNPs.", - ) - ] - - # Analytical threshold value - thresh = col3.number_input("Analytical threshold value:", value=0.03, step=0.01, min_value=0.0) - - ##################################################################### - # SNP: Specify a Reference File if User Has One # - ##################################################################### - - col1, col2, col3 = st.columns(3) - - if "reference" not in st.session_state: - st.session_state.reference = None - - reference = col1.text_input( - "Please Specify Your Reference Sample IDs", - help="List IDs of the samples to be run as references in EFM; default is no " - "reference samples", - ) - - ##################################################################### - # SNP: Specify Working Directory # - ##################################################################### - - st.subheader("Set Output Folder") - - col1, col2, col3, col4, col5 = st.columns(5) - - # Initialize session state if not already initialized - if "wd_dirname" not in st.session_state: - st.session_state.wd_dirname = None - - clicked_wd = col1.button("Please Select An Output Folder") - if clicked_wd: - st.session_state.wd_dirname = select.folder() - - # Display selected path - if st.session_state.wd_dirname: - st.text_input("Your Specified Output Folder:", st.session_state.wd_dirname) - - ##################################################################### - # SNP: Generate Config File Based on Settings # - ##################################################################### - - # Submit Button Instance - if st.button("Submit"): - - # Check if all required fields are filled - if analysis_software and samp_input and output and wd_dirname: - - # Validate output prefix - if not validate_prefix(output): - st.warning( - "Please enter a valid output prefix. Only alphanumeric characters, " - "underscore, and hyphen are allowed." - ) - st.stop() # Stop execution if prefix is invalid - - # Display loading spinner (Continuing Process Checks Above Were Passed) - with st.spinner("Processing Your Data..."): - - # Construct config data - - config_data = { - "analysis_software": analysis_software, - "samp_input": samp_input, - "output": output, - "kit": kit, - "types": types_string, - "thresh": thresh, - "separate": separate, - "nofilter": nofilters, - "strand": strand, - "references": None, # Default value is None - } - - # If a reference file was specified, add to config - if reference: - config_data["references"] = st.session_state.reference - - # Generate YAML config file - generate_config_file(config_data, st.session_state.wd_dirname, "SNP") - - # Subprocess lusSTR commands - command = ["lusstr", "snps", "all"] - - # Specify WD to lusSTR - if wd_dirname: - command.extend(["-w", st.session_state.wd_dirname + "/"]) - - # Run lusSTR command in terminal - try: - subprocess.run(command, check=True) - st.success( - "Config File Generated and lusSTR Executed Successfully! Output Files " - "Have Been Saved to Your Designated Directory and Labeled with your " - "Specified Prefix" - ) - except subprocess.CalledProcessError as e: - st.error(f"Error: {e}") - st.info( - "Please make sure to check the 'How to Use' tab for common error " - "resolutions." - ) - - else: - st.warning( - "Please make sure to fill out all required fields (Analysis Software, Input " - "Directory or File, Prefix for Output, and Specification of Working Directory) " - "before submitting." - ) - - -class Application: - def __init__(self): - st.set_page_config(layout="wide", initial_sidebar_state="collapsed") - self.selected = option_menu( - menu_title=None, - options=["Home", "STRs", "SNPs", "How to Use", "Contact"], - icons=["house", "gear", "gear-fill", "book", "envelope"], - menu_icon="cast", - default_index=0, - orientation="horizontal", - ) - self.samp_input = None - self.wd_dirname = None - - def display(self): - if self.selected == "Home": - docs.home_page() - elif self.selected == "STRs": - self.str_page() - elif self.selected == "SNPs": - show_SNP_page() - elif self.selected == "How to Use": - docs.how_to_use_page() - elif self.selected == "Contact": - docs.contact_page() - - def str_page(self): - st.title("STR Workflow") - st.info( - "Please Select STR Settings Below for lusSTR. For information regarding the settings, see the How to Use tab." - ) - self.str_io() - self.str_settings() - - self.str_footer() - - ##################################################################### - # STR: Generate Config File Based on Settings # - ##################################################################### - - # Submit Button Instance - - def str_io(self): - self.str_input() - self.str_output() - - def str_input(self): - st.subheader("Specify Worfklow Inputs") - st.info( - "Indicate whether you are providing an individual file or a folder containing multiple files" - ) - if "samp_input" not in st.session_state: - st.session_state.samp_input = None - input_option = st.radio( - "Select Input Option:", ("Individual file", "Folder with multiple files") - ) - if input_option == "Folder with Multiple Files": - clicked = st.button("Select a folder") - if clicked: - st.session_state.samp_input = select.folder() - else: - clicked_file = st.button("Select a file") - if clicked_file: - st.session_state.samp_input = select.file() - if st.session_state.samp_input: - st.text_input("Location of your input file(s):", st.session_state.samp_input) - self.samp_input = st.session_state.samp_input - - def str_output(self): - st.subheader("Specify Working Directory for Workflow Outputs") - columns = st.columns(5) - if "wd_dirname" not in st.session_state: - st.session_state.wd_dirname = None - clicked_wd = columns[0].button("Select Output Folder") - if clicked_wd: - st.session_state.wd_dirname = select.folder() - if st.session_state.wd_dirname: - st.text_input("Working directory:", st.session_state.wd_dirname) - self.wd_dirname = st.session_state.wd_dirname - - def str_settings(self): - self.str_general_settings() - self.str_filter_settings() - - def str_general_settings(self): - st.subheader("General Settings") - columns = st.columns(5) - if "analysis_software" not in st.session_state: - st.session_state.analysis_software = None - selected_software = columns[0].selectbox( - "Analysis Software", - options=["UAS", "STRait Razor v3", "GeneMarker HTS"], - help="Indicate the analysis software used prior to lusSTR.", - ) - software = { - "UAS": "uas", - "STRait Razor v3": "straitrazor", - "GeneMarker HTS": "genemarker", - } - st.session_state.analysis_software = software[selected_software] - if "custom_ranges" not in st.session_state: - st.session_state.custom_ranges = None - st.session_state.custom_ranges = st.checkbox( - "Use Custom Sequence Ranges", - help="Check the box to use the specified custom sequence ranges as defined in the `str_markers.json` file.", - ) - if "sex" not in st.session_state: - st.session_state.sex = None - st.session_state.sex = st.checkbox( - "Include X- and Y-STRs", - help="Check the box to include X- and Y-STRs, otherwise leave unchecked.", - ) - if "kit" not in st.session_state: - st.session_state.kit = None - selected_kit = columns[1].selectbox( - "Library Preparation Kit", - options=["ForenSeq Signature Prep", "PowerSeq 46GY"], - help="Specify the library preparation kit used to generate the sequences.", - ) - kits = {"ForenSeq Signature Prep": "forenseq", "PowerSeq 46GY": "powerseq"} - st.session_state.kit = kits[selected_kit] - if "output" not in st.session_state: - st.session_state.output = None - st.session_state.output = columns[2].text_input( - "Output File Name", - "lusstr_output", - help="Please specify a name for the created files. It can only contain alphanumeric characters, underscores and hyphens. No spaces allowed.", - ) - if "nocombine" not in st.session_state: - st.session_state.nocombine = None - st.session_state.nocombine = st.checkbox( - "Do Not Combine Identical Sequences", - help="If using STRait Razor data, by default, identical sequences (after removing flanking sequences) are combined and reads are summed. Checking this will not combine identical sequences.", - ) - - def str_filter_settings(self): - st.subheader("Filter Settings") - columns = st.columns(5) - if "output_type" not in st.session_state: - st.session_state.output_type = None - st.session_state.output_type = { - "STRmix": "strmix", - "EuroForMix": "efm", - "MPSproto": "mpsproto", - }[ - columns[0].selectbox( - "Probabilistic Genotyping Software", - options=["STRmix", "EuroForMix", "MPSproto"], - help="Select which probabilistic genotyping software files to create", - ) - ] - if "profile_type" not in st.session_state: - st.session_state.profile_type = None - st.session_state.profile_type = {"Evidence": "evidence", "Reference": "reference"}[ - columns[1].selectbox( - "Profile Type", - options=["Evidence", "Reference"], - help="Select the file type (format) to create for the probabilistic genotyping software.", - ) - ] - if "data_type" not in st.session_state: - st.session_state.data_type = None - st.session_state.data_type = { - "Sequence": "ngs", - "CE allele": "ce", - "LUS+ allele": "lusplus", - }[ - columns[2].selectbox( - "Data Type", - options=["Sequence", "CE allele", "LUS+ allele"], - help="Select the allele type used to determine sequence type (belowAT, stutter or typed) and used in the final output file.", - ) - ] - if "info" not in st.session_state: - st.session_state.info = None - st.session_state.info = st.checkbox( - "Create Allele Information File", - value=True, - help="Create file containing information about each sequence, including sequence type (belowAT, stutter or typed), stuttering sequence information and metrics involving stutter and noise.", - ) - if "separate" not in st.session_state: - st.session_state.separate = None - st.session_state.separate = st.checkbox( - "Create Separate Files for Samples", - help="If checked, will create individual files for samples; If unchecked, will create one file with all samples.", - ) - if "nofilters" not in st.session_state: - st.session_state.nofilters = None - st.session_state.nofilters = st.checkbox( - "Skip All Filtering Steps", - help="Filtering will not be performed but will still create EFM/MPSproto/STRmix output files containing all sequences.", - ) - if "strand" not in st.session_state: - st.session_state.strand = None - st.session_state.strand = {"UAS Orientation": "uas", "Forward Strand": "forward"}[ - columns[3].selectbox( - "Strand Orientation", - options=["Forward Strand", "UAS Orientation"], - help="Indicates the strand orientation in which to report the sequence in the final output table as some markers are reported in the UAS on the reverse strand. Selecting the UAS Orientation will report those markers on the reverse strand while the remaining will be reported on the forward strand. Selecting the Forward Strand will report all markers on the forward strand orientation. This applies to STRmix NGS only.", - ) - ] - - def str_launch_workflow(self): - if st.button("Run lusSTR"): - if ( - st.session_state.analysis_software - and st.session_state.samp_input - and st.session_state.output - and st.session_state.wd_dirname - ): - if not validate_prefix(st.session_state.output): - st.warning( - "Please enter a valid output prefix. Only alphanumeric characters, underscore, and hyphen are allowed." - ) - st.stop() - with st.spinner("Processing your data..."): - config_data = { - "analysis_software": st.session_state.analysis_software, - "custom_ranges": st.session_state.custom_ranges, - "sex": st.session_state.sex, - "samp_input": st.session_state.samp_input, - "output": st.session_state.output, - "kit": st.session_state.kit, - "nocombine": st.session_state.nocombine, - "output_type": st.session_state.output_type, - "profile_type": st.session_state.profile_type, - "data_type": st.session_state.data_type, - "info": st.session_state.info, - "separate": st.session_state.separate, - "nofilters": st.session_state.nofilters, - "strand": st.session_state.strand, - } - generate_config_file(config_data, st.session_state.wd_dirname, "STR") - command = ["lusstr", "strs", "all"] - if self.wd_dirname: - command.extend(["-w", st.session_state.wd_dirname + "/"]) - try: - subprocess.run(command, check=True) - st.success( - "Config file generated and lusSTR executed successfully! Output files have been saved to your designated directory and labeled with your specified prefix." - ) - except subprocess.CalledProcessError as e: - st.error(f"Error: {e}") - st.info( - "Please make sure to check the 'How to Use' tab for common error resolutions." - ) - else: - st.warning( - "Please make sure to fill out all required fields (Analysis Software, Input Directory or File, Prefix for Output, and Specification of Working Directory) before submitting." - ) - - def str_footer(self): - st.write("---") - st.write( - "After running lusSTR, or if lusSTR has been run previously, the user may view and edit the individual STR marker plots and data." - ) - st.write( - "If lusSTR has been previously run, only the above ```Output Folder``` containing the run files needs to be specified. Other settings will be automatically loaded from the config.yaml file within the specified folder." - ) - if "interactive" not in st.session_state: - st.session_state.interactive = None - if st.button("See Individual Marker Plots & Data") or st.session_state.interactive: - st.session_state.interactive = True - create_settings() - if st.session_state.custom_ranges: - file = f"{st.session_state.wd_dirname}/{st.session_state.output}/{st.session_state.output}_custom_range" - else: - file = f"{self.wd_dirname}/{st.session_state.output}/{st.session_state.output}" - try: - sequence_info = pd.read_csv(f"{file}_sequence_info.csv") - interactive_setup(sequence_info, file) - except FileNotFoundError: - print( - f"{file}_sequence_info.csv not found. Please check output folder specification." - ) + app().display() if __name__ == "__main__": diff --git a/lusSTR/gui/__init__.py b/lusSTR/gui/__init__.py index b41c5ad..3555b68 100644 --- a/lusSTR/gui/__init__.py +++ b/lusSTR/gui/__init__.py @@ -9,3 +9,33 @@ # National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and # Development Center. # ------------------------------------------------------------------------------------------------- + +from .home import HomePage +from .howto import HowToPage +from .contact import ContactPage +from .strs import STRWorkflow +import streamlit as st +from streamlit_option_menu import option_menu + + +apps = { + "Home": HomePage, + "STRs": STRWorkflow, + "SNPs": None, + "How to Use": HowToPage, + "Contact": ContactPage, +} + + +def initialize(): + st.set_page_config(layout="wide", initial_sidebar_state="collapsed") + selected = option_menu( + menu_title=None, + options=["Home", "STRs", "SNPs", "How to Use", "Contact"], + icons=["house", "gear", "gear-fill", "book", "envelope"], + menu_icon="cast", + default_index=0, + orientation="horizontal", + ) + appname = str(selected) + return apps[appname] diff --git a/lusSTR/gui/app.py b/lusSTR/gui/contact.py similarity index 75% rename from lusSTR/gui/app.py rename to lusSTR/gui/contact.py index b41c5ad..7581885 100644 --- a/lusSTR/gui/app.py +++ b/lusSTR/gui/contact.py @@ -9,3 +9,11 @@ # National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and # Development Center. # ------------------------------------------------------------------------------------------------- + +import streamlit as st + + +class ContactPage(): + def display(self): + st.title("Contact Us") + st.write("For any questions or issues, please contact rebecca.mitchell@st.dhs.gov or daniel.standage@st.dhs.gov.") diff --git a/lusSTR/gui/docs.py b/lusSTR/gui/docs.py deleted file mode 100644 index 0e956cb..0000000 --- a/lusSTR/gui/docs.py +++ /dev/null @@ -1,94 +0,0 @@ -# ------------------------------------------------------------------------------------------------- -# Copyright (c) 2024, DHS. -# -# This file is part of lusSTR (http://github.com/bioforensics/lusSTR) and is licensed under -# the BSD license: see LICENSE.txt. -# -# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National -# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the -# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and -# Development Center. -# ------------------------------------------------------------------------------------------------- - -from importlib.resources import files -import streamlit as st - - -def home_page(): - image_path = files("lusSTR") / "cli" / "logo.png" - left_column, center_column, right_column = st.columns([2.5, 8, 2.5]) - with center_column: - st.image(str(image_path), use_column_width="auto") - - # CSS to hide full-screen button - hide_img_fs = """ - - """ - st.markdown(hide_img_fs, unsafe_allow_html=True) - - st.markdown( - """ - lusSTR is an end-to-end workflow for processing human forensic data (STRs and SNPs) - derived from Next Generation Sequencing (NGS) data for use in probabilistic genotyping - software. For more information on lusSTR, visit our - [GitHub page](https://github.com/bioforensics/lusSTR). - """, - unsafe_allow_html=True, - ) - st.info("Please Select One of the Tabs Above to Get Started on Processing Your Data!") - - -def how_to_use_page(): - st.title("Common Errors and Best Practices for Using lusSTR") - st.header("1. File/Folder Path Formatting") - st.write( - "Please ensure that the displayed path accurately reflects your selection. When using " - "the file or folder picker, navigate to the desired location and click 'OK' to " - "confirm your selection." - ) - st.header("2. Specifying Output Prefix") - st.write( - "The purpose of specifying the output prefix is for lusSTR to create result files and " - "folders with that prefix in your working directory. Please ensure that you are " - "following proper file naming formatting and rules when specifying this prefix. Avoid " - "using characters such as '/', '', '.', and others. Note: To avoid potential errors, " - "you can simply use the default placeholder for output." - ) - st.code("Incorrect: 'working_directory/subfolder/subfolder'\nCorrect: output") - st.write( - "Note that some result files may be saved directly in the working directory with the " - "specified prefix, while others will be populated in a folder labeled with the prefix " - "in your working directory." - ) - st.write("Be aware of this behavior when checking for output files.") - st.header("3. Specifying Output Folder") - st.write( - "Please Ensure That You Properly Specify an Output Folder. This is where all lusSTR " - "output files will be saved. To avoid potential errors, specifying a working " - "directory is required." - ) - st.title("About lusSTR") - st.markdown(""" - **_lusSTR Accommodates Four Different Input Formats:_** - - (1) UAS Sample Details Report, UAS Sample Report, and UAS Phenotype Report (for SNP " - "processing) in .xlsx format (a single file or directory containing multiple files) - - (2) STRait Razor v3 output with one sample per file (a single file or directory containing" - " multiple files) - - (3) GeneMarker v2.6 output (a single file or directory containing multiple files) - - (4) Sample(s) sequences in CSV format; first four columns must be Locus, NumReads, Sequence, " - "SampleID; Optional last two columns can be Project and Analysis IDs. - """, - unsafe_allow_html=True, - ) - - -def contact_page(): - st.title("Contact Us") - st.write("For any questions or issues, please contact rebecca.mitchell@st.dhs.gov or daniel.standage@st.dhs.gov.") diff --git a/lusSTR/gui/home.py b/lusSTR/gui/home.py new file mode 100644 index 0000000..73dc8e0 --- /dev/null +++ b/lusSTR/gui/home.py @@ -0,0 +1,42 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2024, DHS. +# +# This file is part of lusSTR (http://github.com/bioforensics/lusSTR) and is licensed under +# the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- + +from importlib.resources import files +import streamlit as st + + +class HomePage(): + def display(self): + image_path = files("lusSTR") / "cli" / "logo.png" + left_column, center_column, right_column = st.columns([2.5, 8, 2.5]) + with center_column: + st.image(str(image_path), use_column_width="auto") + + # CSS to hide full-screen button + hide_img_fs = """ + + """ + st.markdown(hide_img_fs, unsafe_allow_html=True) + + st.markdown( + """ + lusSTR is an end-to-end workflow for processing human forensic data (STRs and SNPs) + derived from Next Generation Sequencing (NGS) data for use in probabilistic genotyping + software. For more information on lusSTR, visit our + [GitHub page](https://github.com/bioforensics/lusSTR). + """, + unsafe_allow_html=True, + ) + st.info("Please Select One of the Tabs Above to Get Started on Processing Your Data!") diff --git a/lusSTR/gui/howto.py b/lusSTR/gui/howto.py new file mode 100644 index 0000000..7f9ba79 --- /dev/null +++ b/lusSTR/gui/howto.py @@ -0,0 +1,62 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2024, DHS. +# +# This file is part of lusSTR (http://github.com/bioforensics/lusSTR) and is licensed under +# the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- + +import streamlit as st + + +class HowToPage(): + def display(self): + st.title("Common Errors and Best Practices for Using lusSTR") + st.header("1. File/Folder Path Formatting") + st.write( + "Please ensure that the displayed path accurately reflects your selection. When using " + "the file or folder picker, navigate to the desired location and click 'OK' to " + "confirm your selection." + ) + st.header("2. Specifying Output Prefix") + st.write( + "The purpose of specifying the output prefix is for lusSTR to create result files and " + "folders with that prefix in your working directory. Please ensure that you are " + "following proper file naming formatting and rules when specifying this prefix. Avoid " + "using characters such as '/', '', '.', and others. Note: To avoid potential errors, " + "you can simply use the default placeholder for output." + ) + st.code("Incorrect: 'working_directory/subfolder/subfolder'\nCorrect: output") + st.write( + "Note that some result files may be saved directly in the working directory with the " + "specified prefix, while others will be populated in a folder labeled with the prefix " + "in your working directory." + ) + st.write("Be aware of this behavior when checking for output files.") + st.header("3. Specifying Output Folder") + st.write( + "Please Ensure That You Properly Specify an Output Folder. This is where all lusSTR " + "output files will be saved. To avoid potential errors, specifying a working " + "directory is required." + ) + st.title("About lusSTR") + st.markdown(""" + **_lusSTR Accommodates Four Different Input Formats:_** + + (1) UAS Sample Details Report, UAS Sample Report, and UAS Phenotype Report (for SNP " + "processing) in .xlsx format (a single file or directory containing multiple files) + + (2) STRait Razor v3 output with one sample per file (a single file or directory containing" + " multiple files) + + (3) GeneMarker v2.6 output (a single file or directory containing multiple files) + + (4) Sample(s) sequences in CSV format; first four columns must be Locus, NumReads, Sequence, " + "SampleID; Optional last two columns can be Project and Analysis IDs. + """, + unsafe_allow_html=True, + ) diff --git a/lusSTR/gui/snps.py b/lusSTR/gui/snps.py new file mode 100644 index 0000000..b91529e --- /dev/null +++ b/lusSTR/gui/snps.py @@ -0,0 +1,251 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2024, DHS. +# +# This file is part of lusSTR (http://github.com/bioforensics/lusSTR) and is licensed under +# the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- + +from . import select +from .util import generate_config_file, validate_prefix +import streamlit as st +import subprocess + + +def show_SNP_page(): + + st.title("SNP Workflow") + st.info( + "Please Select SNP Settings Below for lusSTR! For Information Regarding the Settings," + " See the How to Use Tab." + ) + + # Input File Specification + st.subheader("Input Files Selection") + + # Ask user if submitting a directory or individual file + st.info( + "Please Indicate If You Are Providing An Individual Input File or a Folder Containing " + "Multiple Input Files" + ) + input_option = st.radio( + "Select Input Option:", ("Individual File", "Folder with Multiple Files") + ) + + # Initialize session state if not already initialized + if "samp_input" not in st.session_state: + st.session_state.samp_input = None + + # Logic for Path Picker based on user's input option + + if input_option == "Folder with Multiple Files": + clicked = st.button("Please Select a Folder") + if clicked: + st.session_state.samp_input = select.folder() + + else: + clicked_file = st.button("Please Select a File") + if clicked_file: + st.session_state.samp_input = select.file() + + # Display The Selected Path + if st.session_state.samp_input: + st.text_input("Location Of Your Input File(s):", st.session_state.samp_input) + + # Store Selected Path to Reference in Config + samp_input = st.session_state.samp_input + + ##################################################################### + # SNP: General Software Settings to Generate Config File # + ##################################################################### + + st.subheader("General Settings") + + col1, col2, col3, col4, col5 = st.columns(5) + + analysis_software = {"UAS": "uas", "STRait Razor v3": "straitrazor"}[ + col1.selectbox( + "Analysis Software", + options=["UAS", "STRait Razor v3"], + help="Indicate the analysis software used prior to lusSTR sex.", + ) + ] + + output = col2.text_input( + "Output File Name", "lusstr_output", help="Please specify a name for the created files." + ) + + kit = {"Signature Prep": "sigprep", "Kintelligence": "kintelligence"}[ + col3.selectbox("Library Preparation Kit", options=["Signature Prep", "Kintelligence"]) + ] + + ##################################################################### + # SNP: Format Settings to Generate Config File # + ##################################################################### + + st.subheader("Convert Settings") + + col1, col2, col3, col4, col5 = st.columns(5) + + # -- Select Type (Unique to SNP Workflow) + types_mapping = { + "Identify SNPs": "i", + "Phenotype SNPs": "p", + "Ancestry SNPs": "a", + "All SNPs": "all", + } + selected_types = col1.multiselect( + "Select SNP Types:", + options=types_mapping.keys(), + help="Select the SNP types to process; can select one or more options", + ) + types_string = ( + "all" + if "All" in selected_types + else ", ".join(types_mapping.get(t, t) for t in selected_types) + ) + + # -- Filter + nofilters = st.checkbox( + "Skip all filtering steps", + help="Specify for no filtering", + ) + + ##################################################################### + # SNP: Convert Settings to Generate Config File # + ##################################################################### + + separate = st.checkbox( + "Create Separate Files for Samples", + help="If want to separate samples into individual files for use in EFM", + ) + + strand = {"UAS Orientation": "uas", "Forward Strand": "forward"}[ + col2.selectbox( + "Strand Orientation", + options=["UAS Orientation", "Forward Strand"], + help="Indicate which orientation to report the alleles for the SigPrep SNPs.", + ) + ] + + # Analytical threshold value + thresh = col3.number_input("Analytical threshold value:", value=0.03, step=0.01, min_value=0.0) + + ##################################################################### + # SNP: Specify a Reference File if User Has One # + ##################################################################### + + col1, col2, col3 = st.columns(3) + + if "reference" not in st.session_state: + st.session_state.reference = None + + reference = col1.text_input( + "Please Specify Your Reference Sample IDs", + help="List IDs of the samples to be run as references in EFM; default is no " + "reference samples", + ) + + ##################################################################### + # SNP: Specify Working Directory # + ##################################################################### + + st.subheader("Set Output Folder") + + col1, col2, col3, col4, col5 = st.columns(5) + + # Initialize session state if not already initialized + if "wd_dirname" not in st.session_state: + st.session_state.wd_dirname = None + + clicked_wd = col1.button("Please Select An Output Folder") + if clicked_wd: + st.session_state.wd_dirname = select.folder() + + # Display selected path + if st.session_state.wd_dirname: + st.text_input("Your Specified Output Folder:", st.session_state.wd_dirname) + + ##################################################################### + # SNP: Generate Config File Based on Settings # + ##################################################################### + + # Submit Button Instance + if st.button("Submit"): + + # Check if all required fields are filled + if analysis_software and samp_input and output and wd_dirname: + + # Validate output prefix + if not validate_prefix(output): + st.warning( + "Please enter a valid output prefix. Only alphanumeric characters, " + "underscore, and hyphen are allowed." + ) + st.stop() # Stop execution if prefix is invalid + + # Display loading spinner (Continuing Process Checks Above Were Passed) + with st.spinner("Processing Your Data..."): + + # Construct config data + + config_data = { + "analysis_software": analysis_software, + "samp_input": samp_input, + "output": output, + "kit": kit, + "types": types_string, + "thresh": thresh, + "separate": separate, + "nofilter": nofilters, + "strand": strand, + "references": None, # Default value is None + } + + # If a reference file was specified, add to config + if reference: + config_data["references"] = st.session_state.reference + + # Generate YAML config file + generate_config_file(config_data, st.session_state.wd_dirname, "SNP") + + # Subprocess lusSTR commands + command = ["lusstr", "snps", "all"] + + # Specify WD to lusSTR + if wd_dirname: + command.extend(["-w", st.session_state.wd_dirname + "/"]) + + # Run lusSTR command in terminal + try: + subprocess.run(command, check=True) + st.success( + "Config File Generated and lusSTR Executed Successfully! Output Files " + "Have Been Saved to Your Designated Directory and Labeled with your " + "Specified Prefix" + ) + except subprocess.CalledProcessError as e: + st.error(f"Error: {e}") + st.info( + "Please make sure to check the 'How to Use' tab for common error " + "resolutions." + ) + + else: + st.warning( + "Please make sure to fill out all required fields (Analysis Software, Input " + "Directory or File, Prefix for Output, and Specification of Working Directory) " + "before submitting." + ) + + +if __name__ == "__main__": + main() + + +def subparser(subparsers): + subparsers.add_parser("gui", description="Launch the lusSTR GUI") diff --git a/lusSTR/gui/strs.py b/lusSTR/gui/strs.py new file mode 100644 index 0000000..616b75e --- /dev/null +++ b/lusSTR/gui/strs.py @@ -0,0 +1,498 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2024, DHS. +# +# This file is part of lusSTR (http://github.com/bioforensics/lusSTR) and is licensed under +# the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- + +from . import select +from .util import generate_config_file, validate_prefix +from datetime import datetime +from lusSTR.wrappers.filter import get_at, EFM_output, marker_plots, STRmix_output +import math +import numpy as np +import pandas as pd +from pathlib import Path +import plotly.express as px +import streamlit as st +import yaml +import subprocess +import os + + +class STRWorkflow(): + def __init__(self): + self.samp_input = None + self.wd_dirname = None + + def display(self): + st.title("STR Workflow") + st.info( + "Please Select STR Settings Below for lusSTR. For information regarding the settings, see the How to Use tab." + ) + self.str_input() + self.str_output() + self.str_general_settings() + self.str_filter_settings() + self.str_footer() + + def str_input(self): + st.subheader("Specify Worfklow Inputs") + st.info( + "Indicate whether you are providing an individual file or a folder containing multiple files" + ) + if "samp_input" not in st.session_state: + st.session_state.samp_input = None + input_option = st.radio( + "Select Input Option:", ("Individual file", "Folder with multiple files") + ) + if input_option == "Folder with Multiple Files": + clicked = st.button("Select a folder") + if clicked: + st.session_state.samp_input = select.folder() + else: + clicked_file = st.button("Select a file") + if clicked_file: + st.session_state.samp_input = select.file() + if st.session_state.samp_input: + st.text_input("Location of your input file(s):", st.session_state.samp_input) + self.samp_input = st.session_state.samp_input + + def str_output(self): + st.subheader("Specify Working Directory for Workflow Outputs") + columns = st.columns(5) + if "wd_dirname" not in st.session_state: + st.session_state.wd_dirname = None + clicked_wd = columns[0].button("Select Output Folder") + if clicked_wd: + st.session_state.wd_dirname = select.folder() + if st.session_state.wd_dirname: + st.text_input("Working directory:", st.session_state.wd_dirname) + self.wd_dirname = st.session_state.wd_dirname + + def str_general_settings(self): + st.subheader("General Settings") + columns = st.columns(5) + if "analysis_software" not in st.session_state: + st.session_state.analysis_software = None + selected_software = columns[0].selectbox( + "Analysis Software", + options=["UAS", "STRait Razor v3", "GeneMarker HTS"], + help="Indicate the analysis software used prior to lusSTR.", + ) + software = { + "UAS": "uas", + "STRait Razor v3": "straitrazor", + "GeneMarker HTS": "genemarker", + } + st.session_state.analysis_software = software[selected_software] + if "custom_ranges" not in st.session_state: + st.session_state.custom_ranges = None + st.session_state.custom_ranges = st.checkbox( + "Use Custom Sequence Ranges", + help="Check the box to use the specified custom sequence ranges as defined in the `str_markers.json` file.", + ) + if "sex" not in st.session_state: + st.session_state.sex = None + st.session_state.sex = st.checkbox( + "Include X- and Y-STRs", + help="Check the box to include X- and Y-STRs, otherwise leave unchecked.", + ) + if "kit" not in st.session_state: + st.session_state.kit = None + selected_kit = columns[1].selectbox( + "Library Preparation Kit", + options=["ForenSeq Signature Prep", "PowerSeq 46GY"], + help="Specify the library preparation kit used to generate the sequences.", + ) + kits = {"ForenSeq Signature Prep": "forenseq", "PowerSeq 46GY": "powerseq"} + st.session_state.kit = kits[selected_kit] + if "output" not in st.session_state: + st.session_state.output = None + st.session_state.output = columns[2].text_input( + "Output File Name", + "lusstr_output", + help="Please specify a name for the created files. It can only contain alphanumeric characters, underscores and hyphens. No spaces allowed.", + ) + if "nocombine" not in st.session_state: + st.session_state.nocombine = None + st.session_state.nocombine = st.checkbox( + "Do Not Combine Identical Sequences", + help="If using STRait Razor data, by default, identical sequences (after removing flanking sequences) are combined and reads are summed. Checking this will not combine identical sequences.", + ) + + def str_filter_settings(self): + st.subheader("Filter Settings") + columns = st.columns(5) + if "output_type" not in st.session_state: + st.session_state.output_type = None + st.session_state.output_type = { + "STRmix": "strmix", + "EuroForMix": "efm", + "MPSproto": "mpsproto", + }[ + columns[0].selectbox( + "Probabilistic Genotyping Software", + options=["STRmix", "EuroForMix", "MPSproto"], + help="Select which probabilistic genotyping software files to create", + ) + ] + if "profile_type" not in st.session_state: + st.session_state.profile_type = None + st.session_state.profile_type = {"Evidence": "evidence", "Reference": "reference"}[ + columns[1].selectbox( + "Profile Type", + options=["Evidence", "Reference"], + help="Select the file type (format) to create for the probabilistic genotyping software.", + ) + ] + if "data_type" not in st.session_state: + st.session_state.data_type = None + st.session_state.data_type = { + "Sequence": "ngs", + "CE allele": "ce", + "LUS+ allele": "lusplus", + }[ + columns[2].selectbox( + "Data Type", + options=["Sequence", "CE allele", "LUS+ allele"], + help="Select the allele type used to determine sequence type (belowAT, stutter or typed) and used in the final output file.", + ) + ] + if "info" not in st.session_state: + st.session_state.info = None + st.session_state.info = st.checkbox( + "Create Allele Information File", + value=True, + help="Create file containing information about each sequence, including sequence type (belowAT, stutter or typed), stuttering sequence information and metrics involving stutter and noise.", + ) + if "separate" not in st.session_state: + st.session_state.separate = None + st.session_state.separate = st.checkbox( + "Create Separate Files for Samples", + help="If checked, will create individual files for samples; If unchecked, will create one file with all samples.", + ) + if "nofilters" not in st.session_state: + st.session_state.nofilters = None + st.session_state.nofilters = st.checkbox( + "Skip All Filtering Steps", + help="Filtering will not be performed but will still create EFM/MPSproto/STRmix output files containing all sequences.", + ) + if "strand" not in st.session_state: + st.session_state.strand = None + st.session_state.strand = {"UAS Orientation": "uas", "Forward Strand": "forward"}[ + columns[3].selectbox( + "Strand Orientation", + options=["Forward Strand", "UAS Orientation"], + help="Indicates the strand orientation in which to report the sequence in the final output table as some markers are reported in the UAS on the reverse strand. Selecting the UAS Orientation will report those markers on the reverse strand while the remaining will be reported on the forward strand. Selecting the Forward Strand will report all markers on the forward strand orientation. This applies to STRmix NGS only.", + ) + ] + + def str_launch_workflow(self): + if st.button("Run lusSTR"): + if ( + st.session_state.analysis_software + and st.session_state.samp_input + and st.session_state.output + and st.session_state.wd_dirname + ): + if not validate_prefix(st.session_state.output): + st.warning( + "Please enter a valid output prefix. Only alphanumeric characters, underscore, and hyphen are allowed." + ) + st.stop() + with st.spinner("Processing your data..."): + config_data = { + "analysis_software": st.session_state.analysis_software, + "custom_ranges": st.session_state.custom_ranges, + "sex": st.session_state.sex, + "samp_input": st.session_state.samp_input, + "output": st.session_state.output, + "kit": st.session_state.kit, + "nocombine": st.session_state.nocombine, + "output_type": st.session_state.output_type, + "profile_type": st.session_state.profile_type, + "data_type": st.session_state.data_type, + "info": st.session_state.info, + "separate": st.session_state.separate, + "nofilters": st.session_state.nofilters, + "strand": st.session_state.strand, + } + generate_config_file(config_data, st.session_state.wd_dirname, "STR") + command = ["lusstr", "strs", "all"] + if self.wd_dirname: + command.extend(["-w", st.session_state.wd_dirname + "/"]) + try: + subprocess.run(command, check=True) + st.success( + "Config file generated and lusSTR executed successfully! Output files have been saved to your designated directory and labeled with your specified prefix." + ) + except subprocess.CalledProcessError as e: + st.error(f"Error: {e}") + st.info( + "Please make sure to check the 'How to Use' tab for common error resolutions." + ) + else: + st.warning( + "Please make sure to fill out all required fields (Analysis Software, Input Directory or File, Prefix for Output, and Specification of Working Directory) before submitting." + ) + + def str_footer(self): + st.write("---") + st.write( + "After running lusSTR, or if lusSTR has been run previously, the user may view and edit the individual STR marker plots and data." + ) + st.write( + "If lusSTR has been previously run, only the above ```Output Folder``` containing the run files needs to be specified. Other settings will be automatically loaded from the config.yaml file within the specified folder." + ) + if "interactive" not in st.session_state: + st.session_state.interactive = None + if st.button("See Individual Marker Plots & Data") or st.session_state.interactive: + st.session_state.interactive = True + create_settings() + if st.session_state.custom_ranges: + file = f"{st.session_state.wd_dirname}/{st.session_state.output}/{st.session_state.output}_custom_range" + else: + file = f"{self.wd_dirname}/{st.session_state.output}/{st.session_state.output}" + try: + sequence_info = pd.read_csv(f"{file}_sequence_info.csv") + interactive_setup(sequence_info, file) + except FileNotFoundError: + print( + f"{file}_sequence_info.csv not found. Please check output folder specification." + ) + + +def df_on_change(locus): + state = st.session_state[f"{locus}_edited"] + for index, updates in state["edited_rows"].items(): + st.session_state[locus].loc[st.session_state[locus].index == index, "edited"] = True + for key, value in updates.items(): + st.session_state[locus].loc[st.session_state[locus].index == index, key] = value + + +def interactive_plots_allmarkers(sample_df, flagged_df): + cols = st.columns(4) + max_reads = max(sample_df["Reads"]) + n = 100 if max_reads > 1000 else 10 + max_yvalue = int(math.ceil(max_reads / n)) * n + increase_value = int(math.ceil((max_yvalue / 5)) / n) * n + n = 0 + for marker in sample_df["Locus"].unique(): + col = cols[n] + container = col.container(border=True) + sample_locus = sample_df["SampleID"].unique() + "_" + marker + marker_df = sample_df[sample_df["Locus"] == marker].sort_values(by="CE_Allele") + if sample_locus in flagged_df["key"].values: + marker = f"⚠️{marker}⚠️" + plot = interactive_plots(marker_df, marker, max_yvalue, increase_value, all=True) + container.plotly_chart(plot, use_container_width=True) + if n == 3: + n = 0 + else: + n += 1 + + +def interactive_plots(df, locus, ymax, increase, all=False): + if "⚠️" in locus: + locus_at = locus.replace("⚠️", "") + else: + locus_at = locus + at = get_at(df, locus_at) + for i, row in df.iterrows(): + if "stutter" in df.loc[i, "allele_type"]: + df.loc[i, "Label"] = "Stutter" + else: + df.loc[i, "Label"] = df.loc[i, "allele_type"] + min_x = round(min(df["CE_Allele"]) - 1) + max_x = round(max(df["CE_Allele"]) + 1) + plot = px.bar( + df, + x="CE_Allele", + y="Reads", + color="Label", + color_discrete_map={ + "Typed": "green", + "BelowAT": "red", + "Stutter": "blue", + "Deleted": "purple", + }, + title=locus, + ) + plot.add_hline(y=at, line_width=3, line_dash="dot", line_color="gray") + plot.add_annotation(text="AT", x=min_x + 0.1, y=at, showarrow=False, yshift=10) + plot.update_layout( + xaxis=dict(range=[min_x, max_x], tickmode="array", tickvals=np.arange(min_x, max_x, 1)) + ) + if all: + plot.update_layout( + yaxis=dict(range=[0, ymax], tickmode="array", tickvals=np.arange(0, ymax, increase)) + ) + return plot + + +def remake_final_files(full_df, outpath): + if st.session_state.custom_ranges: + seq_col = "Custom_Range_Sequence" + brack_col = "Custom_Bracketed_Notation" + else: + seq_col = ( + "UAS_Output_Sequence" + if st.session_state.strand == "uas" + else "Forward_Strand_Sequence" + ) + brack_col = ( + "UAS_Output_Bracketed_Notation" + if st.session_state.strand == "uas" + else "Forward_Strand_Bracketed_Notation" + ) + if st.session_state.nofilters: + full_df["allele_type"] = "Typed" + if st.session_state.output_type == "efm" or st.session_state.output_type == "mpsproto": + EFM_output( + full_df, + outpath, + st.session_state.profile_type, + st.session_state.data_type, + brack_col, + st.session_state.sex, + st.session_state.separate, + ) + else: + STRmix_output( + full_df, outpath, st.session_state.profile_type, st.session_state.data_type, seq_col + ) + + +def interactive_setup(df1, file): + col1, col2, col3, col4, col5 = st.columns(5) + sample = col1.selectbox("Select Sample:", options=df1["SampleID"].unique()) + sample_df = df1[df1["SampleID"] == sample].reset_index(drop=True) + locus_list = pd.concat([pd.Series("All Markers"), sample_df["Locus"].drop_duplicates()]) + if os.path.isfile(f"{file}_Flagged_Loci.csv"): + flags = pd.read_csv(f"{file}_Flagged_Loci.csv") + else: + flags = pd.DataFrame(columns=["key", "SampleID", "Locus"]) + flags["key"] = flags["SampleID"] + "_" + flags["Locus"] + flags_sample = flags[flags["SampleID"] == sample].reset_index(drop=True) + for flagged_locus in flags_sample["Locus"].unique(): + locus_list = locus_list.str.replace(flagged_locus, f"⚠️{flagged_locus}⚠️") + locus = col2.selectbox("Select Marker:", options=locus_list) + if "⚠️" in locus: + locus = locus.replace("⚠️", "") + if locus == "All Markers": + if not flags_sample.empty: + st.write("⚠️ indicates potential problems with the marker. Examine the individual marker plots for more information.") + interactive_plots_allmarkers(sample_df, flags) + else: + locus_key = f"{sample}_{locus}" + if locus_key not in st.session_state: + st.session_state[locus_key] = sample_df[sample_df["Locus"] == locus].reset_index( + drop=True + ) + Type = [ + "Deleted", + "Typed", + "-1_stutter", + "-2_stutter", + "BelowAT", + "-1_stutter/+1_stutter", + "+1_stutter", + ] + plot = interactive_plots(st.session_state[locus_key], locus, None, None) + st.plotly_chart(plot, use_container_width=True) + col1, col2, col3 = st.columns(3) + if locus_key in flags["key"].values: + locus_flags = flags[flags["key"] == locus_key] + for flag in locus_flags["Flags"].unique(): + col2.write(f"⚠️ Potential issue: {flag} identified!") + st.data_editor( + data=st.session_state[locus_key], + disabled=( + "SampleID", + "Locus", + "UAS_Output_Sequence", + "CE_Allele", + "UAS_Output_Bracketed_Notation", + "Custom_Range_Sequence", + "Custom_Bracketed_Notation", + "Reads", + "parent_allele1", + "parent_allele2", + "allele1_ref_reads", + "allele2_ref_reads", + "perc_noise", + "perc_stutter", + ), + column_config={ + "allele_type": st.column_config.SelectboxColumn("allele_type", options=Type) + }, + hide_index=True, + key=f"{locus_key}_edited", + on_change=df_on_change, + args=(locus_key,), + ) + if st.button("Save Edits"): + ph = st.empty() + with ph.container(): + st.write("Saving Changes - May take a minute or two.") + combined_df = pd.DataFrame() + for sample in df1["SampleID"].unique(): + sample_df = df1[df1["SampleID"] == sample].reset_index(drop=True) + for locus in sample_df["Locus"].unique(): + locus_key = f"{sample}_{locus}" + try: + combined_df = pd.concat([combined_df, st.session_state[locus_key]]) + except KeyError: + combined_df = pd.concat( + [ + combined_df, + sample_df[sample_df["Locus"] == locus].reset_index(drop=True), + ] + ) + now = datetime.now() + dt = now.strftime("%m%d%Y_%H_%M_%S") + del combined_df["Label"] + Path(f"{st.session_state.wd_dirname}/{st.session_state.output}/edited_{dt}").mkdir( + parents=True, exist_ok=True + ) + outpath = f"{st.session_state.wd_dirname}/{st.session_state.output}/edited_{dt}/" + marker_plots(combined_df, f"{st.session_state.output}_edited_{dt}", sex=False, wd=outpath) + combined_df.to_csv( + f"{st.session_state.wd_dirname}/{st.session_state.output}/edited_{dt}/" + f"{st.session_state.output}_sequence_info_edited_{dt}.csv", + index=False, + ) + new_text = ( + f"Changes saved to {st.session_state.wd_dirname}/{st.session_state.output}" + f"/edited_{dt}/{st.session_state.output}_sequence_info_edited_{dt}.csv" + f"New {st.session_state.output_type} files created in {st.session_state.wd_dirname}" + f"/{st.session_state.output}/edited_{dt}/ folder" + ) + remake_final_files(combined_df, outpath) + ph.empty() + with ph.container(): + st.write( + f"New files and marker plots with edits saved to {st.session_state.wd_dirname}/" + f"{st.session_state.output}/edited_{dt}/" + ) + + +def create_settings(): + if os.path.isfile(f"{st.session_state.wd_dirname}/config.yaml"): + st.write(f"Loading settings from {st.session_state.wd_dirname}/config.yaml") + with open(f"{st.session_state.wd_dirname}/config.yaml", "r") as file: + config_settings = yaml.safe_load(file) + st.session_state.output = config_settings["output"] + st.session_state.custom_ranges = config_settings["custom_ranges"] + st.session_state.profile_type = config_settings["profile_type"] + st.session_state.data_type = config_settings["data_type"] + st.session_state.sex = config_settings["sex"] + st.session_state.separate = config_settings["separate"] + st.session_state.strand = config_settings["strand"] + st.session_state.output_type = config_settings["output_type"] diff --git a/lusSTR/gui/util.py b/lusSTR/gui/util.py new file mode 100644 index 0000000..83f4271 --- /dev/null +++ b/lusSTR/gui/util.py @@ -0,0 +1,31 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2024, DHS. +# +# This file is part of lusSTR (http://github.com/bioforensics/lusSTR) and is licensed under +# the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- + +from pathlib import Path +import re +import yaml + + +def generate_config_file(config_data, working_directory, workflow_type): + if workflow_type == "STR": + config_filename = "config.yaml" + elif workflow_type == "SNP": + config_filename = "snp_config.yaml" + else: + raise ValueError("Invalid workflow type. Please specify either 'STR' or 'SNP'.") + config_path = Path(working_directory) / config_filename + with open(config_path, "w") as file: + yaml.dump(config_data, file) + + +def validate_prefix(prefix): + return re.match(r"^[A-Za-z0-9_-]+$", prefix) is not None From 1714265e9f0192ea4d13a149fb98e7e4e13e999b Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Fri, 18 Oct 2024 16:11:45 -0400 Subject: [PATCH 3/3] More reorganization [skip ci] --- Makefile | 4 +- lusSTR/cli/gui.py | 15 +- lusSTR/gui/__init__.py | 24 +- lusSTR/gui/contact.py | 9 +- lusSTR/gui/home.py | 47 ++-- lusSTR/gui/howto.py | 76 +++---- lusSTR/gui/snps.py | 50 ++--- lusSTR/gui/strs.py | 474 +++++++++++++++++++-------------------- lusSTR/tests/__init__.py | 4 +- 9 files changed, 339 insertions(+), 364 deletions(-) diff --git a/Makefile b/Makefile index f125014..8637307 100755 --- a/Makefile +++ b/Makefile @@ -10,11 +10,11 @@ test: ## style: check code style style: - black --line-length=99 --check *.py lusSTR/cli/*.py lusSTR/scripts/*.py lusSTR/wrappers/*.py lusSTR/tests/test_*.py + black --line-length=99 --check *.py lusSTR/*/*.py ## format: auto-reformat code with Black format: - black --line-length=99 *.py lusSTR/cli/*.py lusSTR/scripts/*.py lusSTR/wrappers/*.py lusSTR/tests/test_*.py + black --line-length=99 *.py lusSTR/*/*.py ## devenv: configure a development environment devenv: diff --git a/lusSTR/cli/gui.py b/lusSTR/cli/gui.py index f188a1f..79a6ab0 100644 --- a/lusSTR/cli/gui.py +++ b/lusSTR/cli/gui.py @@ -11,20 +11,11 @@ # ------------------------------------------------------------------------------------------------- from lusSTR.gui import initialize -from lusSTR.gui.snps import show_SNP_page -def main(): - app = initialize() - if app is None: - show_SNP_page() - else: - app().display() +def subparser(subparsers): + subparsers.add_parser("gui", description="Launch the lusSTR GUI") if __name__ == "__main__": - main() - - -def subparser(subparsers): - subparsers.add_parser("gui", description="Launch the lusSTR GUI") + initialize() diff --git a/lusSTR/gui/__init__.py b/lusSTR/gui/__init__.py index 3555b68..3eae25c 100644 --- a/lusSTR/gui/__init__.py +++ b/lusSTR/gui/__init__.py @@ -10,20 +10,21 @@ # Development Center. # ------------------------------------------------------------------------------------------------- -from .home import HomePage -from .howto import HowToPage -from .contact import ContactPage -from .strs import STRWorkflow +from .contact import contact_page_display +from .home import home_page_display +from .howto import howto_page_display +from .snps import snp_workflow_display +from .strs import str_workflow_display import streamlit as st from streamlit_option_menu import option_menu -apps = { - "Home": HomePage, - "STRs": STRWorkflow, - "SNPs": None, - "How to Use": HowToPage, - "Contact": ContactPage, +pages = { + "Home": home_page_display, + "STRs": str_workflow_display, + "SNPs": snp_workflow_display, + "How to Use": howto_page_display, + "Contact": contact_page_display, } @@ -38,4 +39,5 @@ def initialize(): orientation="horizontal", ) appname = str(selected) - return apps[appname] + renderer = pages[appname] + renderer() diff --git a/lusSTR/gui/contact.py b/lusSTR/gui/contact.py index 7581885..5fdc596 100644 --- a/lusSTR/gui/contact.py +++ b/lusSTR/gui/contact.py @@ -13,7 +13,8 @@ import streamlit as st -class ContactPage(): - def display(self): - st.title("Contact Us") - st.write("For any questions or issues, please contact rebecca.mitchell@st.dhs.gov or daniel.standage@st.dhs.gov.") +def contact_page_display(): + st.title("Contact Us") + st.write( + "For any questions or issues, please contact rebecca.mitchell@st.dhs.gov or daniel.standage@st.dhs.gov." + ) diff --git a/lusSTR/gui/home.py b/lusSTR/gui/home.py index 73dc8e0..eb883ec 100644 --- a/lusSTR/gui/home.py +++ b/lusSTR/gui/home.py @@ -14,29 +14,26 @@ import streamlit as st -class HomePage(): - def display(self): - image_path = files("lusSTR") / "cli" / "logo.png" - left_column, center_column, right_column = st.columns([2.5, 8, 2.5]) - with center_column: - st.image(str(image_path), use_column_width="auto") - - # CSS to hide full-screen button - hide_img_fs = """ - +def home_page_display(): + image_path = files("lusSTR") / "cli" / "logo.png" + left_column, center_column, right_column = st.columns([2.5, 8, 2.5]) + with center_column: + st.image(str(image_path), use_column_width="auto") + # CSS to hide full-screen button + hide_img_fs = """ + + """ + st.markdown(hide_img_fs, unsafe_allow_html=True) + st.markdown( """ - st.markdown(hide_img_fs, unsafe_allow_html=True) - - st.markdown( - """ - lusSTR is an end-to-end workflow for processing human forensic data (STRs and SNPs) - derived from Next Generation Sequencing (NGS) data for use in probabilistic genotyping - software. For more information on lusSTR, visit our - [GitHub page](https://github.com/bioforensics/lusSTR). - """, - unsafe_allow_html=True, - ) - st.info("Please Select One of the Tabs Above to Get Started on Processing Your Data!") + lusSTR is an end-to-end workflow for processing human forensic data (STRs and SNPs) + derived from Next Generation Sequencing (NGS) data for use in probabilistic genotyping + software. For more information on lusSTR, visit our + [GitHub page](https://github.com/bioforensics/lusSTR). + """, + unsafe_allow_html=True, + ) + st.info("Please Select One of the Tabs Above to Get Started on Processing Your Data!") diff --git a/lusSTR/gui/howto.py b/lusSTR/gui/howto.py index 7f9ba79..268dc06 100644 --- a/lusSTR/gui/howto.py +++ b/lusSTR/gui/howto.py @@ -13,50 +13,40 @@ import streamlit as st -class HowToPage(): - def display(self): - st.title("Common Errors and Best Practices for Using lusSTR") - st.header("1. File/Folder Path Formatting") - st.write( - "Please ensure that the displayed path accurately reflects your selection. When using " - "the file or folder picker, navigate to the desired location and click 'OK' to " - "confirm your selection." - ) - st.header("2. Specifying Output Prefix") - st.write( - "The purpose of specifying the output prefix is for lusSTR to create result files and " - "folders with that prefix in your working directory. Please ensure that you are " - "following proper file naming formatting and rules when specifying this prefix. Avoid " - "using characters such as '/', '', '.', and others. Note: To avoid potential errors, " - "you can simply use the default placeholder for output." - ) - st.code("Incorrect: 'working_directory/subfolder/subfolder'\nCorrect: output") - st.write( - "Note that some result files may be saved directly in the working directory with the " - "specified prefix, while others will be populated in a folder labeled with the prefix " - "in your working directory." - ) - st.write("Be aware of this behavior when checking for output files.") - st.header("3. Specifying Output Folder") - st.write( - "Please Ensure That You Properly Specify an Output Folder. This is where all lusSTR " - "output files will be saved. To avoid potential errors, specifying a working " - "directory is required." - ) - st.title("About lusSTR") - st.markdown(""" - **_lusSTR Accommodates Four Different Input Formats:_** +def howto_page_display(): + st.title("Common Errors and Best Practices for Using lusSTR") + st.header("1. File/Folder Path Formatting") + st.write( + "Please ensure that the displayed path accurately reflects your selection. When using the file or folder picker, navigate to the desired location and click 'OK' to confirm your selection." + ) + st.header("2. Specifying Output Prefix") + st.write( + "The purpose of specifying the output prefix is for lusSTR to create result files and folders with that prefix in your working directory. Please ensure that you are following proper file naming formatting and rules when specifying this prefix. Avoid using characters such as '/', '', '.', and others. Note: To avoid potential errors, you can simply use the default placeholder for output." + ) + st.code("Incorrect: 'working_directory/subfolder/subfolder'\nCorrect: output") + st.write( + "Note that some result files may be saved directly in the working directory with the specified prefix, while others will be populated in a folder labeled with the prefix in your working directory." + ) + st.write("Be aware of this behavior when checking for output files.") + st.header("3. Specifying Output Folder") + st.write( + "Please Ensure That You Properly Specify an Output Folder. This is where all lusSTR output files will be saved. To avoid potential errors, specifying a working directory is required." + ) + st.title("About lusSTR") + st.markdown( + """ + **_lusSTR Accommodates Four Different Input Formats:_** - (1) UAS Sample Details Report, UAS Sample Report, and UAS Phenotype Report (for SNP " - "processing) in .xlsx format (a single file or directory containing multiple files) + (1) UAS Sample Details Report, UAS Sample Report, and UAS Phenotype Report (for SNP " + "processing) in .xlsx format (a single file or directory containing multiple files) - (2) STRait Razor v3 output with one sample per file (a single file or directory containing" - " multiple files) + (2) STRait Razor v3 output with one sample per file (a single file or directory containing" + " multiple files) - (3) GeneMarker v2.6 output (a single file or directory containing multiple files) + (3) GeneMarker v2.6 output (a single file or directory containing multiple files) - (4) Sample(s) sequences in CSV format; first four columns must be Locus, NumReads, Sequence, " - "SampleID; Optional last two columns can be Project and Analysis IDs. - """, - unsafe_allow_html=True, - ) + (4) Sample(s) sequences in CSV format; first four columns must be Locus, NumReads, Sequence, " + "SampleID; Optional last two columns can be Project and Analysis IDs. + """, + unsafe_allow_html=True, + ) diff --git a/lusSTR/gui/snps.py b/lusSTR/gui/snps.py index b91529e..23a6053 100644 --- a/lusSTR/gui/snps.py +++ b/lusSTR/gui/snps.py @@ -16,8 +16,7 @@ import subprocess -def show_SNP_page(): - +def snp_workflow_display(): st.title("SNP Workflow") st.info( "Please Select SNP Settings Below for lusSTR! For Information Regarding the Settings," @@ -37,27 +36,27 @@ def show_SNP_page(): ) # Initialize session state if not already initialized - if "samp_input" not in st.session_state: - st.session_state.samp_input = None + if "samp_input_snp" not in st.session_state: + st.session_state.samp_input_snp = None # Logic for Path Picker based on user's input option if input_option == "Folder with Multiple Files": clicked = st.button("Please Select a Folder") if clicked: - st.session_state.samp_input = select.folder() + st.session_state.samp_input_snp = select.folder() else: clicked_file = st.button("Please Select a File") if clicked_file: - st.session_state.samp_input = select.file() + st.session_state.samp_input_snp = select.file() # Display The Selected Path - if st.session_state.samp_input: - st.text_input("Location Of Your Input File(s):", st.session_state.samp_input) + if st.session_state.samp_input_snp: + st.text_input("Location Of Your Input File(s):", st.session_state.samp_input_snp) # Store Selected Path to Reference in Config - samp_input = st.session_state.samp_input + samp_input_snp = st.session_state.samp_input_snp ##################################################################### # SNP: General Software Settings to Generate Config File # @@ -159,16 +158,16 @@ def show_SNP_page(): col1, col2, col3, col4, col5 = st.columns(5) # Initialize session state if not already initialized - if "wd_dirname" not in st.session_state: - st.session_state.wd_dirname = None + if "wd_dirname_snp" not in st.session_state: + st.session_state.wd_dirname_snp = None clicked_wd = col1.button("Please Select An Output Folder") if clicked_wd: - st.session_state.wd_dirname = select.folder() + st.session_state.wd_dirname_snp = select.folder() # Display selected path - if st.session_state.wd_dirname: - st.text_input("Your Specified Output Folder:", st.session_state.wd_dirname) + if st.session_state.wd_dirname_snp: + st.text_input("Your Specified Output Folder:", st.session_state.wd_dirname_snp) ##################################################################### # SNP: Generate Config File Based on Settings # @@ -178,7 +177,12 @@ def show_SNP_page(): if st.button("Submit"): # Check if all required fields are filled - if analysis_software and samp_input and output and wd_dirname: + if ( + analysis_software + and st.session_state.samp_input_snp + and output + and st.session_state.wd_dirname_snp + ): # Validate output prefix if not validate_prefix(output): @@ -195,7 +199,7 @@ def show_SNP_page(): config_data = { "analysis_software": analysis_software, - "samp_input": samp_input, + "samp_input_snp": samp_input_snp, "output": output, "kit": kit, "types": types_string, @@ -211,14 +215,14 @@ def show_SNP_page(): config_data["references"] = st.session_state.reference # Generate YAML config file - generate_config_file(config_data, st.session_state.wd_dirname, "SNP") + generate_config_file(config_data, st.session_state.wd_dirname_snp, "SNP") # Subprocess lusSTR commands command = ["lusstr", "snps", "all"] # Specify WD to lusSTR - if wd_dirname: - command.extend(["-w", st.session_state.wd_dirname + "/"]) + if st.session_state.wd_dirname_snp: + command.extend(["-w", st.session_state.wd_dirname_snp + "/"]) # Run lusSTR command in terminal try: @@ -241,11 +245,3 @@ def show_SNP_page(): "Directory or File, Prefix for Output, and Specification of Working Directory) " "before submitting." ) - - -if __name__ == "__main__": - main() - - -def subparser(subparsers): - subparsers.add_parser("gui", description="Launch the lusSTR GUI") diff --git a/lusSTR/gui/strs.py b/lusSTR/gui/strs.py index 616b75e..74a21c6 100644 --- a/lusSTR/gui/strs.py +++ b/lusSTR/gui/strs.py @@ -25,247 +25,243 @@ import os -class STRWorkflow(): - def __init__(self): - self.samp_input = None - self.wd_dirname = None +def str_workflow_display(): + st.title("STR Workflow") + st.info( + "Please Select STR Settings Below for lusSTR. For information regarding the settings, see the How to Use tab." + ) + str_input() + str_output() + str_general_settings() + str_filter_settings() + str_footer() - def display(self): - st.title("STR Workflow") - st.info( - "Please Select STR Settings Below for lusSTR. For information regarding the settings, see the How to Use tab." - ) - self.str_input() - self.str_output() - self.str_general_settings() - self.str_filter_settings() - self.str_footer() - def str_input(self): - st.subheader("Specify Worfklow Inputs") - st.info( - "Indicate whether you are providing an individual file or a folder containing multiple files" - ) - if "samp_input" not in st.session_state: - st.session_state.samp_input = None - input_option = st.radio( - "Select Input Option:", ("Individual file", "Folder with multiple files") - ) - if input_option == "Folder with Multiple Files": - clicked = st.button("Select a folder") - if clicked: - st.session_state.samp_input = select.folder() - else: - clicked_file = st.button("Select a file") - if clicked_file: - st.session_state.samp_input = select.file() - if st.session_state.samp_input: - st.text_input("Location of your input file(s):", st.session_state.samp_input) - self.samp_input = st.session_state.samp_input +def str_input(): + st.subheader("Specify Worfklow Inputs") + st.info( + "Indicate whether you are providing an individual file or a folder containing multiple files" + ) + if "samp_input_str" not in st.session_state: + st.session_state.samp_input_str = None + input_option = st.radio( + "Select Input Option:", ("Individual file", "Folder with multiple files") + ) + if input_option == "Folder with Multiple Files": + clicked = st.button("Select a folder") + if clicked: + st.session_state.samp_input_str = select.folder() + else: + clicked_file = st.button("Select a file") + if clicked_file: + st.session_state.samp_input_str = select.file() + if st.session_state.samp_input_str: + st.text_input("Location of your input file(s):", st.session_state.samp_input_str) - def str_output(self): - st.subheader("Specify Working Directory for Workflow Outputs") - columns = st.columns(5) - if "wd_dirname" not in st.session_state: - st.session_state.wd_dirname = None - clicked_wd = columns[0].button("Select Output Folder") - if clicked_wd: - st.session_state.wd_dirname = select.folder() - if st.session_state.wd_dirname: - st.text_input("Working directory:", st.session_state.wd_dirname) - self.wd_dirname = st.session_state.wd_dirname - def str_general_settings(self): - st.subheader("General Settings") - columns = st.columns(5) - if "analysis_software" not in st.session_state: - st.session_state.analysis_software = None - selected_software = columns[0].selectbox( - "Analysis Software", - options=["UAS", "STRait Razor v3", "GeneMarker HTS"], - help="Indicate the analysis software used prior to lusSTR.", - ) - software = { - "UAS": "uas", - "STRait Razor v3": "straitrazor", - "GeneMarker HTS": "genemarker", - } - st.session_state.analysis_software = software[selected_software] - if "custom_ranges" not in st.session_state: - st.session_state.custom_ranges = None - st.session_state.custom_ranges = st.checkbox( - "Use Custom Sequence Ranges", - help="Check the box to use the specified custom sequence ranges as defined in the `str_markers.json` file.", - ) - if "sex" not in st.session_state: - st.session_state.sex = None - st.session_state.sex = st.checkbox( - "Include X- and Y-STRs", - help="Check the box to include X- and Y-STRs, otherwise leave unchecked.", +def str_output(): + st.subheader("Specify Working Directory for Workflow Outputs") + columns = st.columns(5) + if "wd_dirname_str" not in st.session_state: + st.session_state.wd_dirname_str = None + clicked_wd = columns[0].button("Select Output Folder") + if clicked_wd: + st.session_state.wd_dirname_str = select.folder() + if st.session_state.wd_dirname_str: + st.text_input("Working directory:", st.session_state.wd_dirname_str) + + +def str_general_settings(): + st.subheader("General Settings") + columns = st.columns(5) + if "analysis_software" not in st.session_state: + st.session_state.analysis_software = None + selected_software = columns[0].selectbox( + "Analysis Software", + options=["UAS", "STRait Razor v3", "GeneMarker HTS"], + help="Indicate the analysis software used prior to lusSTR.", + ) + software = { + "UAS": "uas", + "STRait Razor v3": "straitrazor", + "GeneMarker HTS": "genemarker", + } + st.session_state.analysis_software = software[selected_software] + if "custom_ranges" not in st.session_state: + st.session_state.custom_ranges = None + st.session_state.custom_ranges = st.checkbox( + "Use Custom Sequence Ranges", + help="Check the box to use the specified custom sequence ranges as defined in the `str_markers.json` file.", + ) + if "sex" not in st.session_state: + st.session_state.sex = None + st.session_state.sex = st.checkbox( + "Include X- and Y-STRs", + help="Check the box to include X- and Y-STRs, otherwise leave unchecked.", + ) + if "kit" not in st.session_state: + st.session_state.kit = None + selected_kit = columns[1].selectbox( + "Library Preparation Kit", + options=["ForenSeq Signature Prep", "PowerSeq 46GY"], + help="Specify the library preparation kit used to generate the sequences.", + ) + kits = {"ForenSeq Signature Prep": "forenseq", "PowerSeq 46GY": "powerseq"} + st.session_state.kit = kits[selected_kit] + if "output" not in st.session_state: + st.session_state.output = None + st.session_state.output = columns[2].text_input( + "Output File Name", + "lusstr_output", + help="Please specify a name for the created files. It can only contain alphanumeric characters, underscores and hyphens. No spaces allowed.", + ) + if "nocombine" not in st.session_state: + st.session_state.nocombine = None + st.session_state.nocombine = st.checkbox( + "Do Not Combine Identical Sequences", + help="If using STRait Razor data, by default, identical sequences (after removing flanking sequences) are combined and reads are summed. Checking this will not combine identical sequences.", + ) + + +def str_filter_settings(): + st.subheader("Filter Settings") + columns = st.columns(5) + if "output_type" not in st.session_state: + st.session_state.output_type = None + st.session_state.output_type = { + "STRmix": "strmix", + "EuroForMix": "efm", + "MPSproto": "mpsproto", + }[ + columns[0].selectbox( + "Probabilistic Genotyping Software", + options=["STRmix", "EuroForMix", "MPSproto"], + help="Select which probabilistic genotyping software files to create", ) - if "kit" not in st.session_state: - st.session_state.kit = None - selected_kit = columns[1].selectbox( - "Library Preparation Kit", - options=["ForenSeq Signature Prep", "PowerSeq 46GY"], - help="Specify the library preparation kit used to generate the sequences.", + ] + if "profile_type" not in st.session_state: + st.session_state.profile_type = None + st.session_state.profile_type = {"Evidence": "evidence", "Reference": "reference"}[ + columns[1].selectbox( + "Profile Type", + options=["Evidence", "Reference"], + help="Select the file type (format) to create for the probabilistic genotyping software.", ) - kits = {"ForenSeq Signature Prep": "forenseq", "PowerSeq 46GY": "powerseq"} - st.session_state.kit = kits[selected_kit] - if "output" not in st.session_state: - st.session_state.output = None - st.session_state.output = columns[2].text_input( - "Output File Name", - "lusstr_output", - help="Please specify a name for the created files. It can only contain alphanumeric characters, underscores and hyphens. No spaces allowed.", + ] + if "data_type" not in st.session_state: + st.session_state.data_type = None + st.session_state.data_type = { + "Sequence": "ngs", + "CE allele": "ce", + "LUS+ allele": "lusplus", + }[ + columns[2].selectbox( + "Data Type", + options=["Sequence", "CE allele", "LUS+ allele"], + help="Select the allele type used to determine sequence type (belowAT, stutter or typed) and used in the final output file.", ) - if "nocombine" not in st.session_state: - st.session_state.nocombine = None - st.session_state.nocombine = st.checkbox( - "Do Not Combine Identical Sequences", - help="If using STRait Razor data, by default, identical sequences (after removing flanking sequences) are combined and reads are summed. Checking this will not combine identical sequences.", + ] + if "info" not in st.session_state: + st.session_state.info = None + st.session_state.info = st.checkbox( + "Create Allele Information File", + value=True, + help="Create file containing information about each sequence, including sequence type (belowAT, stutter or typed), stuttering sequence information and metrics involving stutter and noise.", + ) + if "separate" not in st.session_state: + st.session_state.separate = None + st.session_state.separate = st.checkbox( + "Create Separate Files for Samples", + help="If checked, will create individual files for samples; If unchecked, will create one file with all samples.", + ) + if "nofilters" not in st.session_state: + st.session_state.nofilters = None + st.session_state.nofilters = st.checkbox( + "Skip All Filtering Steps", + help="Filtering will not be performed but will still create EFM/MPSproto/STRmix output files containing all sequences.", + ) + if "strand" not in st.session_state: + st.session_state.strand = None + st.session_state.strand = {"UAS Orientation": "uas", "Forward Strand": "forward"}[ + columns[3].selectbox( + "Strand Orientation", + options=["Forward Strand", "UAS Orientation"], + help="Indicates the strand orientation in which to report the sequence in the final output table as some markers are reported in the UAS on the reverse strand. Selecting the UAS Orientation will report those markers on the reverse strand while the remaining will be reported on the forward strand. Selecting the Forward Strand will report all markers on the forward strand orientation. This applies to STRmix NGS only.", ) + ] - def str_filter_settings(self): - st.subheader("Filter Settings") - columns = st.columns(5) - if "output_type" not in st.session_state: - st.session_state.output_type = None - st.session_state.output_type = { - "STRmix": "strmix", - "EuroForMix": "efm", - "MPSproto": "mpsproto", - }[ - columns[0].selectbox( - "Probabilistic Genotyping Software", - options=["STRmix", "EuroForMix", "MPSproto"], - help="Select which probabilistic genotyping software files to create", - ) - ] - if "profile_type" not in st.session_state: - st.session_state.profile_type = None - st.session_state.profile_type = {"Evidence": "evidence", "Reference": "reference"}[ - columns[1].selectbox( - "Profile Type", - options=["Evidence", "Reference"], - help="Select the file type (format) to create for the probabilistic genotyping software.", - ) - ] - if "data_type" not in st.session_state: - st.session_state.data_type = None - st.session_state.data_type = { - "Sequence": "ngs", - "CE allele": "ce", - "LUS+ allele": "lusplus", - }[ - columns[2].selectbox( - "Data Type", - options=["Sequence", "CE allele", "LUS+ allele"], - help="Select the allele type used to determine sequence type (belowAT, stutter or typed) and used in the final output file.", - ) - ] - if "info" not in st.session_state: - st.session_state.info = None - st.session_state.info = st.checkbox( - "Create Allele Information File", - value=True, - help="Create file containing information about each sequence, including sequence type (belowAT, stutter or typed), stuttering sequence information and metrics involving stutter and noise.", - ) - if "separate" not in st.session_state: - st.session_state.separate = None - st.session_state.separate = st.checkbox( - "Create Separate Files for Samples", - help="If checked, will create individual files for samples; If unchecked, will create one file with all samples.", - ) - if "nofilters" not in st.session_state: - st.session_state.nofilters = None - st.session_state.nofilters = st.checkbox( - "Skip All Filtering Steps", - help="Filtering will not be performed but will still create EFM/MPSproto/STRmix output files containing all sequences.", - ) - if "strand" not in st.session_state: - st.session_state.strand = None - st.session_state.strand = {"UAS Orientation": "uas", "Forward Strand": "forward"}[ - columns[3].selectbox( - "Strand Orientation", - options=["Forward Strand", "UAS Orientation"], - help="Indicates the strand orientation in which to report the sequence in the final output table as some markers are reported in the UAS on the reverse strand. Selecting the UAS Orientation will report those markers on the reverse strand while the remaining will be reported on the forward strand. Selecting the Forward Strand will report all markers on the forward strand orientation. This applies to STRmix NGS only.", - ) - ] - def str_launch_workflow(self): - if st.button("Run lusSTR"): - if ( - st.session_state.analysis_software - and st.session_state.samp_input - and st.session_state.output - and st.session_state.wd_dirname - ): - if not validate_prefix(st.session_state.output): - st.warning( - "Please enter a valid output prefix. Only alphanumeric characters, underscore, and hyphen are allowed." - ) - st.stop() - with st.spinner("Processing your data..."): - config_data = { - "analysis_software": st.session_state.analysis_software, - "custom_ranges": st.session_state.custom_ranges, - "sex": st.session_state.sex, - "samp_input": st.session_state.samp_input, - "output": st.session_state.output, - "kit": st.session_state.kit, - "nocombine": st.session_state.nocombine, - "output_type": st.session_state.output_type, - "profile_type": st.session_state.profile_type, - "data_type": st.session_state.data_type, - "info": st.session_state.info, - "separate": st.session_state.separate, - "nofilters": st.session_state.nofilters, - "strand": st.session_state.strand, - } - generate_config_file(config_data, st.session_state.wd_dirname, "STR") - command = ["lusstr", "strs", "all"] - if self.wd_dirname: - command.extend(["-w", st.session_state.wd_dirname + "/"]) - try: - subprocess.run(command, check=True) - st.success( - "Config file generated and lusSTR executed successfully! Output files have been saved to your designated directory and labeled with your specified prefix." - ) - except subprocess.CalledProcessError as e: - st.error(f"Error: {e}") - st.info( - "Please make sure to check the 'How to Use' tab for common error resolutions." - ) - else: +def str_launch_workflow(): + if st.button("Run lusSTR"): + if ( + st.session_state.analysis_software + and st.session_state.samp_input_str + and st.session_state.output + and st.session_state.wd_dirname_str + ): + if not validate_prefix(st.session_state.output): st.warning( - "Please make sure to fill out all required fields (Analysis Software, Input Directory or File, Prefix for Output, and Specification of Working Directory) before submitting." + "Please enter a valid output prefix. Only alphanumeric characters, underscore, and hyphen are allowed." ) + st.stop() + with st.spinner("Processing your data..."): + config_data = { + "analysis_software": st.session_state.analysis_software, + "custom_ranges": st.session_state.custom_ranges, + "sex": st.session_state.sex, + "samp_input_str": st.session_state.samp_input_str, + "output": st.session_state.output, + "kit": st.session_state.kit, + "nocombine": st.session_state.nocombine, + "output_type": st.session_state.output_type, + "profile_type": st.session_state.profile_type, + "data_type": st.session_state.data_type, + "info": st.session_state.info, + "separate": st.session_state.separate, + "nofilters": st.session_state.nofilters, + "strand": st.session_state.strand, + } + generate_config_file(config_data, st.session_state.wd_dirname_str, "STR") + command = ["lusstr", "strs", "all"] + if wd_dirname_str: + command.extend(["-w", st.session_state.wd_dirname_str + "/"]) + try: + subprocess.run(command, check=True) + st.success( + "Config file generated and lusSTR executed successfully! Output files have been saved to your designated directory and labeled with your specified prefix." + ) + except subprocess.CalledProcessError as e: + st.error(f"Error: {e}") + st.info( + "Please make sure to check the 'How to Use' tab for common error resolutions." + ) + else: + st.warning( + "Please make sure to fill out all required fields (Analysis Software, Input Directory or File, Prefix for Output, and Specification of Working Directory) before submitting." + ) - def str_footer(self): - st.write("---") - st.write( - "After running lusSTR, or if lusSTR has been run previously, the user may view and edit the individual STR marker plots and data." - ) - st.write( - "If lusSTR has been previously run, only the above ```Output Folder``` containing the run files needs to be specified. Other settings will be automatically loaded from the config.yaml file within the specified folder." - ) - if "interactive" not in st.session_state: - st.session_state.interactive = None - if st.button("See Individual Marker Plots & Data") or st.session_state.interactive: - st.session_state.interactive = True - create_settings() - if st.session_state.custom_ranges: - file = f"{st.session_state.wd_dirname}/{st.session_state.output}/{st.session_state.output}_custom_range" - else: - file = f"{self.wd_dirname}/{st.session_state.output}/{st.session_state.output}" - try: - sequence_info = pd.read_csv(f"{file}_sequence_info.csv") - interactive_setup(sequence_info, file) - except FileNotFoundError: - print( - f"{file}_sequence_info.csv not found. Please check output folder specification." - ) + +def str_footer(): + st.write("---") + st.write( + "After running lusSTR, or if lusSTR has been run previously, the user may view and edit the individual STR marker plots and data." + ) + st.write( + "If lusSTR has been previously run, only the above ```Output Folder``` containing the run files needs to be specified. Other settings will be automatically loaded from the config.yaml file within the specified folder." + ) + if "interactive" not in st.session_state: + st.session_state.interactive = None + if st.button("See Individual Marker Plots & Data") or st.session_state.interactive: + st.session_state.interactive = True + create_settings() + file = f"{st.session_state.wd_dirname_str}/{st.session_state.output}/{st.session_state.output}" + if st.session_state.custom_ranges: + file += "_custom_range" + try: + sequence_info = pd.read_csv(f"{file}_sequence_info.csv") + interactive_setup(sequence_info, file) + except FileNotFoundError: + print(f"{file}_sequence_info.csv not found. Please check output folder specification.") def df_on_change(locus): @@ -387,7 +383,9 @@ def interactive_setup(df1, file): locus = locus.replace("⚠️", "") if locus == "All Markers": if not flags_sample.empty: - st.write("⚠️ indicates potential problems with the marker. Examine the individual marker plots for more information.") + st.write( + "⚠️ indicates potential problems with the marker. Examine the individual marker plots for more information." + ) interactive_plots_allmarkers(sample_df, flags) else: locus_key = f"{sample}_{locus}" @@ -458,35 +456,35 @@ def interactive_setup(df1, file): now = datetime.now() dt = now.strftime("%m%d%Y_%H_%M_%S") del combined_df["Label"] - Path(f"{st.session_state.wd_dirname}/{st.session_state.output}/edited_{dt}").mkdir( + Path(f"{st.session_state.wd_dirname_str}/{st.session_state.output}/edited_{dt}").mkdir( parents=True, exist_ok=True ) - outpath = f"{st.session_state.wd_dirname}/{st.session_state.output}/edited_{dt}/" + outpath = f"{st.session_state.wd_dirname_str}/{st.session_state.output}/edited_{dt}/" marker_plots(combined_df, f"{st.session_state.output}_edited_{dt}", sex=False, wd=outpath) combined_df.to_csv( - f"{st.session_state.wd_dirname}/{st.session_state.output}/edited_{dt}/" + f"{st.session_state.wd_dirname_str}/{st.session_state.output}/edited_{dt}/" f"{st.session_state.output}_sequence_info_edited_{dt}.csv", index=False, ) new_text = ( - f"Changes saved to {st.session_state.wd_dirname}/{st.session_state.output}" + f"Changes saved to {st.session_state.wd_dirname_str}/{st.session_state.output}" f"/edited_{dt}/{st.session_state.output}_sequence_info_edited_{dt}.csv" - f"New {st.session_state.output_type} files created in {st.session_state.wd_dirname}" + f"New {st.session_state.output_type} files created in {st.session_state.wd_dirname_str}" f"/{st.session_state.output}/edited_{dt}/ folder" ) remake_final_files(combined_df, outpath) ph.empty() with ph.container(): st.write( - f"New files and marker plots with edits saved to {st.session_state.wd_dirname}/" + f"New files and marker plots with edits saved to {st.session_state.wd_dirname_str}/" f"{st.session_state.output}/edited_{dt}/" ) def create_settings(): - if os.path.isfile(f"{st.session_state.wd_dirname}/config.yaml"): - st.write(f"Loading settings from {st.session_state.wd_dirname}/config.yaml") - with open(f"{st.session_state.wd_dirname}/config.yaml", "r") as file: + if os.path.isfile(f"{st.session_state.wd_dirname_str}/config.yaml"): + st.write(f"Loading settings from {st.session_state.wd_dirname_str}/config.yaml") + with open(f"{st.session_state.wd_dirname_str}/config.yaml", "r") as file: config_settings = yaml.safe_load(file) st.session_state.output = config_settings["output"] st.session_state.custom_ranges = config_settings["custom_ranges"] diff --git a/lusSTR/tests/__init__.py b/lusSTR/tests/__init__.py index c2c9c74..1768d73 100644 --- a/lusSTR/tests/__init__.py +++ b/lusSTR/tests/__init__.py @@ -15,6 +15,6 @@ def data_file(path): - pathparts = path.split('/') - relpath = os.path.join('tests', 'data', *pathparts) + pathparts = path.split("/") + relpath = os.path.join("tests", "data", *pathparts) return importlib.resources.files("lusSTR") / relpath