Skip to content

Commit

Permalink
moving functions from VCFWriter to VCFHeader as appropriate as well a…
Browse files Browse the repository at this point in the history
…s some unittests, starting work on #96, #94, #69
  • Loading branch information
charlesgregory committed Sep 1, 2021
1 parent b4e5e01 commit cb0282d
Show file tree
Hide file tree
Showing 2 changed files with 221 additions and 100 deletions.
202 changes: 202 additions & 0 deletions source/dhtslib/vcf/header.d
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
module dhtslib.vcf.header;

import std.datetime;
import std.string: fromStringz, toStringz;
import std.format: format;
import std.traits : isArray, isIntegral, isSomeString;
import std.conv: to, ConvException;

import htslib.vcf;
import htslib.hts_log;

/** Wrapper around bcf_hdr_t
Expand Down Expand Up @@ -35,6 +40,11 @@ struct VCFHeader
assert(this.hdr != null);
}

/// copy this header
auto dup(){
return VCFHeader(bcf_hdr_dup(this.hdr));
}

/// List of contigs in the header
@property string[] sequences()
{
Expand All @@ -61,4 +71,196 @@ struct VCFHeader
pragma(inline, true)
@property int nsamples() { return bcf_hdr_nsamples(this.hdr); }

// TODO
/// copy header lines from a template without overwiting existing lines
void copyHeaderLines(bcf_hdr_t *other)
{
assert(this.hdr != null);
assert(0);
// bcf_hdr_t *bcf_hdr_merge(bcf_hdr_t *dst, const(bcf_hdr_t) *src);
}

/// Add sample to this VCF
/// * int bcf_hdr_add_sample(bcf_hdr_t *hdr, const(char) *sample);
int addSample(string name)
in { assert(name != ""); }
do
{
assert(this.hdr != null);

bcf_hdr_add_sample(this.hdr, toStringz(name));

// AARRRRGGGHHHH
// https://github.com/samtools/htslib/issues/767
bcf_hdr_sync(this.hdr);

return 0;
}

/// Add a new header line
int addHeaderLineKV(string key, string value)
{
// TODO check that key is not INFO, FILTER, FORMAT (or contig?)
string line = format("##%s=%s", key, value);

return bcf_hdr_append(this.hdr, toStringz(line));
}
/// Add a new header line -- must be formatted ##key=value
int addHeaderLineRaw(string line)
{
assert(this.hdr != null);
// int bcf_hdr_append(bcf_hdr_t *h, const(char) *line);
const auto ret = bcf_hdr_append(this.hdr, toStringz(line));
bcf_hdr_sync(this.hdr);
return ret;
}
/// Add a filedate= headerline, which is not called out specifically in the spec,
/// but appears in the spec's example files. We could consider allowing a param here.
int addFiledate()
{
return addHeaderLineKV("filedate", (cast(Date) Clock.currTime()).toISOString );
}

/** Add INFO (§1.2.2) or FORMAT (§1.2.4) tag
The INFO tag describes row-specific keys used in the INFO column;
The FORMAT tag describes sample-specific keys used in the last, and optional, genotype column.
Template parameter: string; must be INFO or FORMAT
The first four parameters are required; NUMBER and TYPE have specific allowable values.
source and version are optional, but recommended (for INFO only).
* id: ID tag
* number: NUMBER tag; here a string because it can also take special values {A,R,G,.} (see §1.2.2)
* type: Integer, Float, Flag, Character, and String
* description: Text description; will be double quoted
* source: Annotation source (eg dbSNP)
* version: Annotation version (eg 142)
*/
void addTag(string tagType, T)( string id,
T number,
string type,
string description,
string source="",
string _version="")
if((tagType == "INFO" || tagType == "FORMAT") && (isIntegral!T || isSomeString!T))
{
string line; // we'll suffix with \0, don't worry

// check ID
if (id == "") {
hts_log_error(__FUNCTION__, "no ID");
return;
}

// check Number is either a special code {A,R,G,.} or an integer
static if(isSomeString!T) {
if (number != "A" &&
number != "R" &&
number != "G" &&
number != ".") {
// not a special ; check if integer
try {
number.to!int; // don't need to store result, will use format/%s
}
catch (ConvException e) {
hts_log_error(__FUNCTION__, "Number not A/R/G/. nor an integer");
return;
}
}
}

// check Type
if (type != "Integer" &&
type != "Float" &&
type != "Flag" &&
type != "Character" &&
type != "String") {
hts_log_error(__FUNCTION__, "unrecognized type");
return;
}

// check Description
if (description == "") hts_log_error(__FUNCTION__, "no description");

// check Source and Version
if (source == "" && _version != "") hts_log_error(__FUNCTION__, "version wo source");

// Format params
if (source != "" && _version != "")
line = format("##%s=<ID=%s,Number=%s,Type=%s,Description=\"%s\",Source=\"%s\",Version=\"%s\">\0",
tagType, id, number, type, description, source, _version);
else if (source != "" && _version == "")
line = format("##%s=<ID=%s,Number=%s,Type=%s,Description=\"%s\",Source=\"%s\">\0",
tagType, id, number, type, description, source);
else
line = format("##%s=<ID=%s,Number=%s,Type=%s,Description=\"%s\">\0",
tagType, id, number, type, description);

bcf_hdr_append(this.hdr, line.ptr);
}

/** Add FILTER tag (§1.2.3) */
void addTag(string tagType)(string id, string description)
if(tagType == "FILTER")
{
// check ID
if (id == "") {
hts_log_error(__FUNCTION__, "no ID");
return;
}
// check Description
if (description == "") hts_log_error(__FUNCTION__, "no description");

string line = format("##FILTER=<ID=%s,Description=\"%s\">\0", id, description);
bcf_hdr_append(this.hdr, line.ptr);
}

/** Add FILTER tag (§1.2.3) */
deprecated void addFilterTag(string id, string description)
{
string filter = format("##FILTER=<ID=%s,Description=\"%s\">\0", id, description);
bcf_hdr_append(this.hdr, filter.ptr);
}

/** Add contig definition (§1.2.7) to header meta-info
other: "url=...,md5=...,etc."
*/
auto addTag(string tagType)(const(char)[] id, const int length = 0, string other = "")
if(tagType == "contig" || tagType == "CONTIG")
{
const(char)[] contig = "##contig=<ID=" ~ id ~
(length > 0 ? ",length=" ~ length.to!string : "") ~
(other != "" ? "," ~ other : "") ~
">\0";

return bcf_hdr_append(this.hdr, contig.ptr);
}
}

///
debug(dhtslib_unittest)
unittest
{
import std.exception: assertThrown;
import std.stdio: writeln, writefln;

hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);


auto hdr = VCFHeader(bcf_hdr_init("w\0"c.ptr));

hdr.addHeaderLineRaw("##INFO=<ID=NS,Number=1,Type=Integer,Description=\"Number of Samples With Data\">");
hdr.addHeaderLineKV("INFO", "<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">");
// ##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
hdr.addTag!"INFO"("AF", "A", "Integer", "Number of Samples With Data");
hdr.addHeaderLineRaw("##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species=\"Homo sapiens\",taxonomy=x>"); // @suppress(dscanner.style.long_line)
hdr.addHeaderLineRaw("##FILTER=<ID=q10,Description=\"Quality below 10\">");

// Exercise header
assert(hdr.nsamples == 0);
hdr.addSample("NA12878");
assert(hdr.nsamples == 1);
}
Loading

0 comments on commit cb0282d

Please sign in to comment.