Skip to content

Commit

Permalink
Fix BCF/VCF on-the-fly indexing issues
Browse files Browse the repository at this point in the history
* Switch from hts_idx_push() to bgzf_idx_push() for on-the-fly
  indexing of BCF and VCF.bgz files.  The latter function is
  needed to record the correct offsets when using multi-threaded
  BGZF compression.

  Fixes samtools/bcftools#1985

* Only allow indexing of BGZF-compressed files.  It's necessary to
  enforce this as on-the-fly indexing assumes that the file
  pointer is in htsFile::fp.bgzf, but uncompressed VCF uses
  htsFile::fp.hfile.
  • Loading branch information
daviesrob authored and whitwham committed Sep 22, 2023
1 parent f94527b commit 7994291
Showing 1 changed file with 11 additions and 5 deletions.
16 changes: 11 additions & 5 deletions vcf.c
Original file line number Diff line number Diff line change
Expand Up @@ -2236,7 +2236,8 @@ int bcf_write(htsFile *hfp, bcf_hdr_t *h, bcf1_t *v)
if ( bgzf_write(fp, v->indiv.s, v->indiv.l) != v->indiv.l ) return -1;

if (hfp->idx) {
if (hts_idx_push(hfp->idx, v->rid, v->pos, v->pos + v->rlen, bgzf_tell(fp), 1) < 0)
if (bgzf_idx_push(fp, hfp->idx, v->rid, v->pos, v->pos + v->rlen,
bgzf_tell(fp), 1) < 0)
return -1;
}

Expand Down Expand Up @@ -4184,19 +4185,19 @@ int vcf_write(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v)
if ( fp->format.compression!=no_compression ) {
if (bgzf_flush_try(fp->fp.bgzf, fp->line.l) < 0)
return -1;
if (fp->idx)
hts_idx_amend_last(fp->idx, bgzf_tell(fp->fp.bgzf));
ret = bgzf_write(fp->fp.bgzf, fp->line.s, fp->line.l);
} else {
ret = hwrite(fp->fp.hfile, fp->line.s, fp->line.l);
}

if (fp->idx) {
if (fp->idx && fp->format.compression == bgzf) {
int tid;
if ((tid = hts_idx_tbi_name(fp->idx, v->rid, bcf_seqname_safe(h, v))) < 0)
return -1;

if (hts_idx_push(fp->idx, tid, v->pos, v->pos + v->rlen, bgzf_tell(fp->fp.bgzf), 1) < 0)
if (bgzf_idx_push(fp->fp.bgzf, fp->idx,
tid, v->pos, v->pos + v->rlen,
bgzf_tell(fp->fp.bgzf), 1) < 0)
return -1;
}

Expand Down Expand Up @@ -4385,6 +4386,11 @@ static int vcf_idx_init(htsFile *fp, bcf_hdr_t *h, int min_shift, const char *fn
int bcf_idx_init(htsFile *fp, bcf_hdr_t *h, int min_shift, const char *fnidx) {
int n_lvls, nids = 0;

if (fp->format.compression != bgzf) {
hts_log_error("Indexing is only supported on BGZF-compressed files");
return -3; // Matches no-compression return for bcf_index_build3()
}

if (fp->format.format == vcf)
return vcf_idx_init(fp, h, min_shift, fnidx);

Expand Down

0 comments on commit 7994291

Please sign in to comment.