Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Initial implementation of Molecular bar codes handling using AGeNT #462

Merged
merged 5 commits into from
Dec 20, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions snappy_pipeline/workflows/ngs_mapping/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,28 @@ rule ngs_mapping_bwa_mem2_run:
wf.wrapper_path("bwa_mem2")


# Run Molecular Barcodes meta-tool --------------------------------------------


rule ngs_mapping_mbcs_run:
input:
wf.get_input_files("mbcs", "run"),
output:
**wf.get_output_files("mbcs", "run"),
threads: wf.get_resource("mbcs", "run", "threads")
resources:
time=wf.get_resource("mbcs", "run", "time"),
memory=wf.get_resource("mbcs", "run", "memory"),
partition=wf.get_resource("mbcs", "run", "partition"),
tmpdir=wf.get_resource("mbcs", "run", "tmpdir"),
params:
args=wf.substep_dispatch("mbcs", "get_args", "run"),
log:
**wf.get_log_file("mbcs", "run"),
wrapper:
wf.wrapper_path("mbcs")


# Run STAR --------------------------------------------------------------------


Expand Down
85 changes: 84 additions & 1 deletion snappy_pipeline/workflows/ngs_mapping/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@
EXT_NAMES = ("bam", "bai", "bam_md5", "bai_md5")

#: Available read mappers for (short/Illumina) DNA-seq data
READ_MAPPERS_DNA = ("bwa", "bwa_mem2")
READ_MAPPERS_DNA = ("bwa", "bwa_mem2", "mbcs")

#: Available read mappers for (short/Illumina) RNA-seq data
READ_MAPPERS_RNA = ("star",)
Expand Down Expand Up @@ -389,6 +389,21 @@
trim_adapters: false
mask_duplicates: true
split_as_secondary: true # -M flag
# Configuration for mbcs (meta sub-step to deal with Molecular Barcodes & base uqality recalibration)
mbcs:
mapping_tool: REQUIRED # Either bwa of bwa_mem2. The indices & other parameters are taken from mapper config
mbc_tool: agent # Only agent currently implemented
agent:
prepare:
path: REQUIRED
lib_prep_type: REQUIRED # One of "halo" (HaloPlex), "hs" (HaloPlexHS), "xt" (SureSelect XT, XT2, XT HS), "v2" (SureSelect XT HS2) & "qxt" (SureSelect QXT)
extra_args: [] # Consider "-polyG 8" for NovaSeq data & "-minFractionRead 50" for 100 cycles data
mark_duplicates:
path: REQUIRED
consensus_mode: REQUIRED # One of "SINGLE", "HYBRID", "DUPLEX"
input_filter_args: []
consensus_filter_args: []
extra_args: []
# Configuration for STAR
star:
path_index: REQUIRED # Required if listed in ngs_mapping.tools.rna; otherwise, can be removed.
Expand Down Expand Up @@ -760,6 +775,73 @@ def check_config(self):
)


class MBCsStepPart(ReadMappingStepPart):
"""Support for performing NGS alignment on MBC data"""

name = "mbcs"
tool_category = "dna"

LIB_PREP_TYPES = ("halo", "hs", "xt", "v2", "qxt")
CONSENSUS_MODES = ("SINGLE", "HYBRID", "DUPLEX")

def get_resource_usage(self, action):
"""Get Resource Usage

:param action: Action (i.e., step) in the workflow, example: 'run'.
:type action: str

:return: Returns ResourceUsage for step.

:raises UnsupportedActionException: if action not in class defined list of valid actions.
"""
self._validate_action(action)
return ResourceUsage(
threads=1,
time="24:00:00",
memory="4G",
)

def check_config(self):
"""Check parameters in configuration.

Method checks that all parameters required to execute BWA-MEM2 are present in the
configuration. It further checks that the provided index has all the expected file
extensions. If invalid configuration, it raises InvalidConfiguration exception.
"""
# Check if tool is at all included in workflow
if self.__class__.name not in self.config["tools"]["dna"]:
return # mbcs not run, don't check configuration # pragma: no cover

# Check mapper
mapper = self.config["mbcs"]["mapping_tool"]
assert mapper != "mbcs" and mapper in READ_MAPPERS_DNA, f'Unknown mapper "{mapper}"'
self.parent.sub_steps[mapper].check_config()

# Check trimmer & creak paths
path = self.config["mbcs"]["agent"]["prepare"]["path"]
if not os.path.exists(path):
raise InvalidConfiguration(
f"Expected agent's trimmer input path {path} does not exist!"
)
path = self.config["mbcs"]["agent"]["mark_duplicates"]["path"]
if not os.path.exists(path):
raise InvalidConfiguration(f"Expected agent's creak input path {path} does not exist!")

# Check mandatory options
option = self.config["mbcs"]["agent"]["prepare"]["lib_prep_type"]
if option not in self.__class__.LIB_PREP_TYPES:
options = '", "'.join(self.__class__.LIB_PREP_TYPES)
raise InvalidConfiguration(
f'Unkown library preparation type "{option}", valid options are "{options}"'
)
option = self.config["mbcs"]["agent"]["mark_duplicates"]["consensus_mode"]
if option not in self.__class__.CONSENSUS_MODES:
options = '", "'.join(self.__class__.CONSENSUS_MODES)
raise InvalidConfiguration(
f'Unkown consensus mode "{option}", valid options are "{options}"'
)


class StarStepPart(ReadMappingStepPart):
"""Support for performing NGS alignment using STAR"""

Expand Down Expand Up @@ -1362,6 +1444,7 @@ def __init__(self, workflow, config, config_lookup_paths, config_paths, workdir)
(
BwaStepPart,
BwaMem2StepPart,
MBCsStepPart,
ExternalStepPart,
LinkInStep,
Minimap2StepPart,
Expand Down
13 changes: 13 additions & 0 deletions snappy_wrappers/wrappers/mbcs/environment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
channels:
- conda-forge
- bioconda
dependencies:
- cffi
- pandas
- numpy
- snakemake-minimal
- openjdk =17
- seqtk
- samtools
- bwa-mem2 ==2.2.1
- gatk4
Loading
Loading