From 799429155aef0debfe302c74847102b2971d40fc Mon Sep 17 00:00:00 2001 From: Rob Davies Date: Mon, 18 Sep 2023 16:27:21 +0100 Subject: [PATCH] Fix BCF/VCF on-the-fly indexing issues * Switch from hts_idx_push() to bgzf_idx_push() for on-the-fly indexing of BCF and VCF.bgz files. The latter function is needed to record the correct offsets when using multi-threaded BGZF compression. Fixes samtools/bcftools#1985 * Only allow indexing of BGZF-compressed files. It's necessary to enforce this as on-the-fly indexing assumes that the file pointer is in htsFile::fp.bgzf, but uncompressed VCF uses htsFile::fp.hfile. --- vcf.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/vcf.c b/vcf.c index 3bc7ef607..301d818cf 100644 --- a/vcf.c +++ b/vcf.c @@ -2236,7 +2236,8 @@ int bcf_write(htsFile *hfp, bcf_hdr_t *h, bcf1_t *v) if ( bgzf_write(fp, v->indiv.s, v->indiv.l) != v->indiv.l ) return -1; if (hfp->idx) { - if (hts_idx_push(hfp->idx, v->rid, v->pos, v->pos + v->rlen, bgzf_tell(fp), 1) < 0) + if (bgzf_idx_push(fp, hfp->idx, v->rid, v->pos, v->pos + v->rlen, + bgzf_tell(fp), 1) < 0) return -1; } @@ -4184,19 +4185,19 @@ int vcf_write(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v) if ( fp->format.compression!=no_compression ) { if (bgzf_flush_try(fp->fp.bgzf, fp->line.l) < 0) return -1; - if (fp->idx) - hts_idx_amend_last(fp->idx, bgzf_tell(fp->fp.bgzf)); ret = bgzf_write(fp->fp.bgzf, fp->line.s, fp->line.l); } else { ret = hwrite(fp->fp.hfile, fp->line.s, fp->line.l); } - if (fp->idx) { + if (fp->idx && fp->format.compression == bgzf) { int tid; if ((tid = hts_idx_tbi_name(fp->idx, v->rid, bcf_seqname_safe(h, v))) < 0) return -1; - if (hts_idx_push(fp->idx, tid, v->pos, v->pos + v->rlen, bgzf_tell(fp->fp.bgzf), 1) < 0) + if (bgzf_idx_push(fp->fp.bgzf, fp->idx, + tid, v->pos, v->pos + v->rlen, + bgzf_tell(fp->fp.bgzf), 1) < 0) return -1; } @@ -4385,6 +4386,11 @@ static int vcf_idx_init(htsFile *fp, bcf_hdr_t *h, int min_shift, const char *fn int bcf_idx_init(htsFile *fp, bcf_hdr_t *h, int min_shift, const char *fnidx) { int n_lvls, nids = 0; + if (fp->format.compression != bgzf) { + hts_log_error("Indexing is only supported on BGZF-compressed files"); + return -3; // Matches no-compression return for bcf_index_build3() + } + if (fp->format.format == vcf) return vcf_idx_init(fp, h, min_shift, fnidx);