Skip to content

Commit

Permalink
Expand test-bcf-sr.c capabilities
Browse files Browse the repository at this point in the history
Add -O,--output-fmt option so it can write vcf or bcf as well as
its original summary format.

Add -o,--output option so it's possible to write to a file without
shell redirection.

Add --args option so input files can be listed directly on the
command line instead of via a fofn, to make basic tests easier.

Add -r,--regions and -t,--targets options, which behave the
same as the equivalents in `bcftools view`.

Add the --no-index option to the usage text.

Simplify writing the original format.  Everything can be sent
directly to the output file without going via a kstring.  The
output writing parts are also moved into separate functions to
keep main() from getting too big.

Add a few extra error checks.

Call exit(EXIT_FAILURE) on failure, not exit(-1).  Make the -h
option return success.
  • Loading branch information
daviesrob authored and jkbonfield committed Jun 27, 2023
1 parent 3c36c9b commit 28a8082
Showing 1 changed file with 159 additions and 42 deletions.
201 changes: 159 additions & 42 deletions test/test-bcf-sr.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,46 +28,128 @@

#include <config.h>

#include <stdlib.h>
#include <getopt.h>
#include <stdio.h>
#include <stdarg.h>
#include <inttypes.h>
#include <strings.h>
#include <errno.h>

#include "../htslib/synced_bcf_reader.h"
#include "../htslib/hts.h"
#include "../htslib/vcf.h"

void error(const char *format, ...)
{
va_list ap;
va_start(ap, format);
vfprintf(stderr, format, ap);
va_end(ap);
exit(-1);
exit(EXIT_FAILURE);
}

void usage(void)
void usage(int exit_code)
{
fprintf(stderr, "Usage: test-bcf-sr [OPTIONS] vcf-list.txt\n");
fprintf(stderr, " test-bcf-sr [OPTIONS] -args file1.bcf [...]\n");
fprintf(stderr, "Options:\n");
fprintf(stderr, " --args pass filenames directly in argument list\n");
fprintf(stderr, " --no-index allow streaming\n");
fprintf(stderr, " -o, --output <file> output file (stdout if not set)\n");
fprintf(stderr, " -O, --output-fmt <fmt> fmt: vcf,bcf,summary\n");
fprintf(stderr, " -p, --pair <logic[+ref]> logic: snps,indels,both,snps+ref,indels+ref,both+ref,exact,some,all\n");
fprintf(stderr, " -r, --regions <reg_list> comma-separated list of regions\n");
fprintf(stderr, " -t, --targets <reg_list> comma-separated list of targets\n");
fprintf(stderr, "\n");
exit(-1);
exit(exit_code);
}

void write_summary_format(bcf_srs_t *sr, FILE *out)
{
int n, i, j;
while ((n = bcf_sr_next_line(sr)) > 0) {
for (i=0; i<sr->nreaders; i++)
{
if ( !bcf_sr_has_line(sr,i) ) continue;
bcf1_t *rec = bcf_sr_get_line(sr, i);
if (!rec) error("bcf_sr_get_line() unexpectedly returned NULL\n");
fprintf(out, "%s:%"PRIhts_pos,
bcf_seqname_safe(bcf_sr_get_header(sr,i),rec),rec->pos+1);
break;
}

for (i=0; i<sr->nreaders; i++)
{
fprintf(out, "\t");

if ( !bcf_sr_has_line(sr,i) )
{
fprintf(out, "%s","-");
continue;
}

bcf1_t *rec = bcf_sr_get_line(sr, i);
if (!rec) error("bcf_sr_get_line() unexpectedly returned NULL\n");
fprintf(out, "%s", rec->n_allele > 1 ? rec->d.allele[1] : ".");
for (j=2; j<rec->n_allele; j++)
{
fprintf(out, ",%s", rec->d.allele[j]);
}
}
fprintf(out, "\n");
}
}

void write_vcf_bcf_format(bcf_srs_t *sr, bcf_hdr_t *hdr, vcfFile *vcf_out,
const char *fmt_type)
{
int i, n;
if (bcf_hdr_write(vcf_out, hdr) != 0)
error("Couldn't write %s header\n", fmt_type);

while ((n = bcf_sr_next_line(sr)) > 0) {
for (i=0; i<sr->nreaders; i++)
{
if ( !bcf_sr_has_line(sr,i) ) continue;
bcf1_t *rec = bcf_sr_get_line(sr, i);
if (!rec) error("bcf_sr_get_line() unexpectedly returned NULL\n");
if (vcf_write(vcf_out, hdr, rec) < 0)
error("vcf_write() failed\n");
}
}
}

int main(int argc, char *argv[])
{
static struct option loptions[] =
{
{"help",no_argument,NULL,'h'},
{"output-fmt",required_argument,NULL,'O'},
{"pair",required_argument,NULL,'p'},
{"regions",required_argument,NULL,'r'},
{"targets",required_argument,NULL,'t'},
{"no-index",no_argument,NULL,1000},
{"args",no_argument,NULL,1001},
{NULL,0,NULL,0}
};

int c, pair = 0, use_index = 1;
while ((c = getopt_long(argc, argv, "p:h", loptions, NULL)) >= 0)
int c, pair = 0, use_index = 1, use_fofn = 1;
enum htsExactFormat out_fmt = text_format; // for original pos + alleles
const char *out_fn = NULL, *regions = NULL, *targets = NULL;
while ((c = getopt_long(argc, argv, "o:O:p:r:t:h", loptions, NULL)) >= 0)
{
switch (c)
{
case 'o':
out_fn = optarg;
break;
case 'O':
if (!strcasecmp(optarg, "vcf")) out_fmt = vcf;
else if (!strcasecmp(optarg, "bcf")) out_fmt = bcf;
else if (!strcasecmp(optarg, "summary")) out_fmt = text_format;
else error("Unknown output format \"%s\"\n", optarg);
break;
case 'p':
if ( !strcmp(optarg,"snps") ) pair |= BCF_SR_PAIR_SNPS;
else if ( !strcmp(optarg,"snp+ref") ) pair |= BCF_SR_PAIR_SNPS|BCF_SR_PAIR_SNP_REF;
Expand All @@ -83,68 +165,103 @@ int main(int argc, char *argv[])
else if ( !strcmp(optarg,"exact") ) pair = BCF_SR_PAIR_EXACT;
else error("The --pair logic \"%s\" not recognised.\n", optarg);
break;
case 'r':
regions = optarg;
break;
case 't':
targets = optarg;
break;
case 1000:
use_index = 0;
break;
default: usage();
case 1001:
use_fofn = 0;
break;
case 'h':
usage(EXIT_SUCCESS);
default: usage(EXIT_FAILURE);
}
}
if ( !pair ) pair = BCF_SR_PAIR_EXACT;
if ( optind == argc ) usage();
if ( optind == argc ) usage(EXIT_FAILURE);

int i, j, n, nvcf;
char **vcf = hts_readlist(argv[optind], 1, &nvcf);
if ( !vcf ) error("Could not parse %s\n", argv[optind]);
int i, nvcf;
char **vcfs = NULL;
if (use_fofn) {
vcfs = hts_readlist(argv[optind], 1, &nvcf);
if ( !vcfs ) error("Could not parse %s\n", argv[optind]);
} else {
vcfs = &argv[optind];
nvcf = argc - optind;
}

bcf_srs_t *sr = bcf_sr_init();
if (!sr) error("bcf_sr_init() failed\n");
bcf_sr_set_opt(sr, BCF_SR_PAIR_LOGIC, pair);
if (use_index) {
bcf_sr_set_opt(sr, BCF_SR_REQUIRE_IDX);
} else {
bcf_sr_set_opt(sr, BCF_SR_ALLOW_NO_IDX);
}
for (i=0; i<nvcf; i++)
if ( !bcf_sr_add_reader(sr,vcf[i]) ) error("Failed to open %s: %s\n", vcf[i],bcf_sr_strerror(sr->errnum));

kstring_t str = {0,0,0};
while ( (n=bcf_sr_next_line(sr)) )
if (regions)
{
for (i=0; i<sr->nreaders; i++)
if (bcf_sr_set_regions(sr, regions, 0) != 0)
error("Failed to set regions\n");
}

if (targets)
{
if (bcf_sr_set_targets(sr, targets, 0, 0) != 0)
error("Failed to set targets\n");
}

for (i=0; i<nvcf; i++)
if ( !bcf_sr_add_reader(sr,vcfs[i]) ) error("Failed to open %s: %s\n", vcfs[i],bcf_sr_strerror(sr->errnum));

if (!sr->readers || sr->nreaders < 1)
error("No readers set, even though one was added\n");

if (out_fmt == text_format) {
FILE *out = stdout;
if (out_fn)
{
if ( !bcf_sr_has_line(sr,i) ) continue;
bcf1_t *rec = bcf_sr_get_line(sr, i);
printf("%s:%"PRIhts_pos, bcf_seqname_safe(bcf_sr_get_header(sr,i),rec),rec->pos+1);
break;
out = fopen(out_fn, "w");
if (!out) error("Couldn't open \"%s\" for writing: %s\n",
out_fn, strerror(errno));
}

for (i=0; i<sr->nreaders; i++)
write_summary_format(sr, out);
if (out_fn)
{
printf("\t");
if (fclose(out) != 0)
error("Error on closing %s : %s\n",
out_fn, strerror(errno));
}
} else {
const char *fmt_type = out_fmt == vcf ? "VCF" : "BCF";

if ( !bcf_sr_has_line(sr,i) )
{
printf("%s","-");
continue;
}
bcf_hdr_t *hdr = bcf_sr_get_header(sr, 0);
if (!hdr) error("%s output, but don't have a header\n", fmt_type);

str.l = 0;
bcf1_t *rec = bcf_sr_get_line(sr, i);
kputs(rec->n_allele > 1 ? rec->d.allele[1] : ".", &str);
for (j=2; j<rec->n_allele; j++)
{
kputc(',', &str);
kputs(rec->d.allele[j], &str);
}
printf("%s",str.s);
}
printf("\n");
if (!out_fn) { out_fn = "-"; }
vcfFile *vcf_out = vcf_open(out_fn, out_fmt == vcf ? "w" : "wb");
if (!vcf_out) error("Couldn't open \"%s\" for writing: %s\n",
out_fn, strerror(errno));
write_vcf_bcf_format(sr, hdr, vcf_out, fmt_type);
if (vcf_close(vcf_out) != 0)
error("Error on closing \"%s\"\n", out_fn);
}

free(str.s);
if (sr->errnum) error("Synced reader error: %s\n",
bcf_sr_strerror(sr->errnum));

bcf_sr_destroy(sr);
for (i=0; i<nvcf; i++)
free(vcf[i]);
free(vcf);
if (use_fofn)
{
for (i=0; i<nvcf; i++)
free(vcfs[i]);
free(vcfs);
}

return 0;
}
Expand Down

0 comments on commit 28a8082

Please sign in to comment.