diff --git a/pathogenprofiler/cli.py b/pathogenprofiler/cli.py index 9110161..1fad0b6 100644 --- a/pathogenprofiler/cli.py +++ b/pathogenprofiler/cli.py @@ -10,7 +10,7 @@ from .models import Variant, DrVariant, Gene, DrGene, SpeciesPrediction, Species, BarcodeResult from .mutation_db import MutationDB from .vcf import Vcf -from .sanity import check_bam_for_rg, check_vcf_chrom_match +from .sanity import check_bam_for_rg, check_vcf_chrom_match, check_bam_chrom_match def get_variant_filters(args): filters = {} @@ -203,7 +203,7 @@ def run_profiler(args) -> List[Union[Variant,DrVariant,Gene,DrGene]]: tmp_vcf_file = f"{args.files_prefix}.tmp.vcf.gz" run_cmd(f"bcftools view {args.vcf} | modify_lofreq_vcf.py --sample {args.prefix} | bcftools view -Oz -o {tmp_vcf_file}") args.vcf = tmp_vcf_file - # check_vcf_chrom_match(args.vcf,args.conf["ref"]) + check_vcf_chrom_match(args.vcf,args.conf["ref"]) annotated_variants = vcf_profiler(args) return annotated_variants @@ -262,6 +262,7 @@ def get_bam_file(args): bam_file = bam_obj.bam_file else: check_bam_for_rg(args.bam) + check_bam_chrom_match(args.bam,args.conf["ref"]) bam_file = args.bam return bam_file diff --git a/pathogenprofiler/sanity.py b/pathogenprofiler/sanity.py index d8a495a..cfa9f27 100644 --- a/pathogenprofiler/sanity.py +++ b/pathogenprofiler/sanity.py @@ -17,3 +17,13 @@ def check_vcf_chrom_match(vcf_file, ref_file) -> None: for chrom in vcf.header.contigs: if chrom not in ref.references: raise Exception(f"Chromosome {chrom} in VCF file {vcf_file} not found in reference file {ref_file}.") + +def check_bam_chrom_match(bam_file, ref_file) -> None: + """ + Check if the chromosomes in the BAM file match the reference file. + """ + bam = pysam.AlignmentFile(bam_file, "rb") + ref = pysam.FastaFile(ref_file) + for chrom in bam.references: + if chrom not in ref.references: + raise Exception(f"Chromosome {chrom} in BAM file {bam_file} not found in reference file {ref_file}.") \ No newline at end of file