Skip to content

Commit

Permalink
checks for chrom names (jodyphelan/TBProfiler/issues/394)
Browse files Browse the repository at this point in the history
  • Loading branch information
jodyphelan committed Sep 30, 2024
1 parent 96779c4 commit fecef55
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 2 deletions.
5 changes: 3 additions & 2 deletions pathogenprofiler/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from .models import Variant, DrVariant, Gene, DrGene, SpeciesPrediction, Species, BarcodeResult
from .mutation_db import MutationDB
from .vcf import Vcf
from .sanity import check_bam_for_rg, check_vcf_chrom_match
from .sanity import check_bam_for_rg, check_vcf_chrom_match, check_bam_chrom_match

def get_variant_filters(args):
filters = {}
Expand Down Expand Up @@ -203,7 +203,7 @@ def run_profiler(args) -> List[Union[Variant,DrVariant,Gene,DrGene]]:
tmp_vcf_file = f"{args.files_prefix}.tmp.vcf.gz"
run_cmd(f"bcftools view {args.vcf} | modify_lofreq_vcf.py --sample {args.prefix} | bcftools view -Oz -o {tmp_vcf_file}")
args.vcf = tmp_vcf_file
# check_vcf_chrom_match(args.vcf,args.conf["ref"])
check_vcf_chrom_match(args.vcf,args.conf["ref"])
annotated_variants = vcf_profiler(args)

return annotated_variants
Expand Down Expand Up @@ -262,6 +262,7 @@ def get_bam_file(args):
bam_file = bam_obj.bam_file
else:
check_bam_for_rg(args.bam)
check_bam_chrom_match(args.bam,args.conf["ref"])
bam_file = args.bam

return bam_file
Expand Down
10 changes: 10 additions & 0 deletions pathogenprofiler/sanity.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,13 @@ def check_vcf_chrom_match(vcf_file, ref_file) -> None:
for chrom in vcf.header.contigs:
if chrom not in ref.references:
raise Exception(f"Chromosome {chrom} in VCF file {vcf_file} not found in reference file {ref_file}.")

def check_bam_chrom_match(bam_file, ref_file) -> None:
"""
Check if the chromosomes in the BAM file match the reference file.
"""
bam = pysam.AlignmentFile(bam_file, "rb")
ref = pysam.FastaFile(ref_file)
for chrom in bam.references:
if chrom not in ref.references:
raise Exception(f"Chromosome {chrom} in BAM file {bam_file} not found in reference file {ref_file}.")

0 comments on commit fecef55

Please sign in to comment.