Skip to content

Commit

Permalink
Add min_identity and min_coverage params
Browse files Browse the repository at this point in the history
  • Loading branch information
dfornika committed Oct 18, 2022
1 parent 77c7c17 commit dfe103a
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 6 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,14 @@ kma index -i <your_scheme.fa> -o <your_scheme>
```
nextflow run BCCDC-PHL/kma-cgmlst \
--fastq_input </path/to/fastqs> \
[--min_identity <min_percent_identity>] \
[--min_coverage <min_percent_coverage>] \
--scheme </path/to/cgmlst_scheme> \
--outdir </path/to/output_dir>
```

The `--min_identity` and `--min_coverage` flags can be used to control the identity and coverage thresholds that are used to call an allele. They both default to 100% if the flags are omitted.

Alternatively, a `samplesheet.csv` file can be provided, with fields: `ID`,`R1`,`R2`:

```
Expand Down Expand Up @@ -55,4 +59,4 @@ nextflow run BCCDC-PHL/kma-cgmlst \
--scheme </path/to/cgmlst_scheme> \
--versioned_outdir \
--outdir </path/to/output_dir>
```
```
8 changes: 5 additions & 3 deletions bin/kma_result_to_mlst.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,15 @@ def main(args):
str(alleles[best_allele]['depth']),
]))

if alleles[best_allele]['template_identity'] == 100 and alleles[best_allele]['template_coverage'] == 100:
if alleles[best_allele]['template_identity'] >= args.min_identity and alleles[best_allele]['template_coverage'] >= args.min_coverage:
mlst_output[alleles[best_allele]['locus_id']] = alleles[best_allele]['allele_id']
else:
mlst_output[alleles[best_allele]['locus_id']] = '-'
else:
mlst_output[locus] = '-'


with open(args.o, 'w') as f:
with open(args.output, 'w') as f:
fieldnames = ['sample_id'] + all_loci
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
Expand All @@ -128,6 +128,8 @@ def main(args):
parser.add_argument("--locus-allele-delimiter", help="Delimiter separating locus id from allele id", default='_')
parser.add_argument("-s", "--sample-id", help="Sample ID", default='unknown')
parser.add_argument("-a", "--alleles", help="List of all alleles")
parser.add_argument("-o")
parser.add_argument("-i", "--min-identity", type=float, default=100.0, help="Minimum identity to consider an allele match")
parser.add_argument("-c", "--min-coverage", type=float, default=100.0, help="Minimum coverage to consider an allele match")
parser.add_argument("-o", "--output", help="Output")
args = parser.parse_args()
main(args)
2 changes: 2 additions & 0 deletions modules/kma_result_to_mlst.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ process kma_result_to_mlst {
--alleles ${scheme}.name \
--sample-id "${sample_id}" \
--locus-allele-delimiter "_" \
--min-identity ${params.min_identity} \
--min-coverage ${params.min_coverage} \
-o ${sample_id}_cgmlst.csv \
> ${sample_id}_locus_qc.csv
"""
Expand Down
9 changes: 7 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
manifest {
author = 'Dan Fornika <[email protected]>, Sherrie Wang <[email protected]>'
name = 'BCCDC-PHL/kma-cgmlst'
version = '0.1.0'
version = '0.1.1'
description = 'BCCDC-PHL kma cgMLST'
mainScript = 'main.nf'
nextflowVersion = '>=20.01.0'
Expand All @@ -16,6 +16,8 @@ params {
versioned_outdir = false
pipeline_short_name = parsePipelineName(manifest.toMap().get('name'))
pipeline_minor_version = parseMinorVersion(manifest.toMap().get('version'))
min_identity = 100.0
min_coverage = 100.0
}

def makeFastqSearchPath ( illumina_suffixes, fastq_exts ) {
Expand Down Expand Up @@ -48,7 +50,10 @@ profiles {
}

process {
withName: kma_align {
withName: fastp {
cpus = 4
}
withName: kma_align {
cpus = 8
}
}

0 comments on commit dfe103a

Please sign in to comment.