Skip to content

Commit

Permalink
Add sam_hdr_passes_filter() for SAM header filter expressions
Browse files Browse the repository at this point in the history
  • Loading branch information
jmarshall committed Nov 2, 2021
1 parent a9d1c53 commit daf9502
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 4 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ hts-object-files: $(LIBHTS_OBJS)
bgzf.o bgzf.pico: bgzf.c config.h $(htslib_hts_h) $(htslib_bgzf_h) $(htslib_hfile_h) $(htslib_thread_pool_h) $(htslib_hts_endian_h) cram/pooled_alloc.h $(hts_internal_h) $(htslib_khash_h)
errmod.o errmod.pico: errmod.c config.h $(htslib_hts_h) $(htslib_ksort_h) $(htslib_hts_os_h)
kstring.o kstring.pico: kstring.c config.h $(htslib_kstring_h)
header.o header.pico: header.c config.h $(textutils_internal_h) $(header_h)
header.o header.pico: header.c config.h $(htslib_hts_expr_h) $(textutils_internal_h) $(header_h)
hfile.o hfile.pico: hfile.c config.h $(htslib_hfile_h) $(hfile_internal_h) $(htslib_kstring_h) $(hts_internal_h) $(htslib_khash_h)
hfile_gcs.o hfile_gcs.pico: hfile_gcs.c config.h $(htslib_hts_h) $(htslib_kstring_h) $(hfile_internal_h)
hfile_libcurl.o hfile_libcurl.pico: hfile_libcurl.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_h)
Expand Down
94 changes: 94 additions & 0 deletions header.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <string.h>
#include <assert.h>
#include <errno.h>
#include "htslib/hts_expr.h"
#include "textutils_internal.h"
#include "header.h"

Expand Down Expand Up @@ -898,6 +899,84 @@ static int sam_hrecs_parse_lines(sam_hrecs_t *hrecs, const char *hdr, size_t len
return 0;
}

static int sam_hrecs_tag_lookup(sam_hrec_type_t *h_type, const char *tagname,
char type, hts_expr_val_t *result) {
sam_hrec_tag_t *tag = sam_hrecs_find_key(h_type, tagname, NULL);
if (tag == NULL || tag->len < 3) {
result->is_str = (type == 'Z');
result->s.l = 0;
result->d = 0.0;
result->is_true = 0;
return 0;
}

result->is_true = 1;
switch (type) {
case 'f':
result->is_str = 0;
result->d = strtod(&tag->str[3], NULL);
return 0;

case 'i':
result->is_str = 0;
result->d = strtoll(&tag->str[3], NULL, 0);
return 0;

case 'Z':
result->is_str = 1;
ks_clear(&result->s);
return (kputsn(&tag->str[3], tag->len-3, &result->s) >= 0)? 0 : -1;
}

return -1;
}

static int sam_hrecs_sym_lookup(void *data, char *str, char **end,
hts_expr_val_t *result) {
sam_hrec_type_t *h_type = (sam_hrec_type_t *) data;

switch (*str) {
case 't':
if (memcmp(str, "type", 4) == 0) {
*end = &str[4];
result->is_str = 1;
ks_clear(&result->s);
kputc_(h_type->type >> 8, &result->s);
kputc(h_type->type & 0xff, &result->s);
return (ks_len(&result->s) == 2)? 0 : -1;
}
break;

case '@':
if (isalpha_c(str[1]) && isalpha_c(str[2])) {
*end = &str[3];
result->is_str = 0;
result->is_true = result->d = (h_type->type == TYPEKEY(&str[1]));
return 0;
}
break;

case '[':
if (str[1] & str[2]) {
if (str[3] == ']') {
*end = &str[4];
return sam_hrecs_tag_lookup(h_type, &str[1], 'Z', result);
}
else if (str[3] == ':' && str[4] && str[5] == ']') {
if (!strchr("fiZ", str[4])) {
hts_log_error("Unrecognised %.6s type code", str);
return -1;
}
*end = &str[6];
return sam_hrecs_tag_lookup(h_type, &str[1], str[4], result);
}
}
break;
}

return -1;
}

/*! Update sam_hdr_t target_name and target_len arrays
*
* @return 0 on success; -1 on failure
Expand Down Expand Up @@ -1762,6 +1841,21 @@ int sam_hdr_remove_lines(sam_hdr_t *bh, const char *type, const char *id, void *
return ret;
}

int sam_hdr_passes_filter(sam_hdr_t *bh, sam_hdr_line_itr_t *iter, hts_filter_t *filt) {
hts_expr_val_t result;
int ret;
if (hts_filter_eval(filt, iter, sam_hrecs_sym_lookup, &result) >= 0) {
ret = result.is_true;
}
else {
hts_log_error("Couldn't process sam_hdr filter expression");
ret = -1;
}

hts_expr_val_free(&result);
return ret;
}

int sam_hdr_count_lines(sam_hdr_t *bh, const char *type) {
int count;
sam_hrec_type_t *first_ty, *itr_ty;
Expand Down
13 changes: 10 additions & 3 deletions htslib/sam.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ DEALINGS IN THE SOFTWARE. */
extern "C" {
#endif

struct hts_filter_t;
struct sam_hrec_type_s;

/// Highest SAM format version supported by this library
Expand Down Expand Up @@ -845,6 +846,15 @@ hts_pos_t sam_hdr_tid2len(const sam_hdr_t *h, int tid);
*/
static inline int bam_name2id(sam_hdr_t *h, const char *ref) { return sam_hdr_name2tid(h, ref); }

/// Check whether a header line passes an hts_filter
/** @param h Pointer to the header structure previously read
* @param iter Iterator pointing to a header line
* @param filt Pointer to the filter, created from hts_filter_init
* @return 1 if it passes, 0 if not, and <0 on error
*/
HTSLIB_EXPORT
int sam_hdr_passes_filter(sam_hdr_t *h, sam_hdr_line_itr_t *iter, struct hts_filter_t *filt);

/// Generate a unique \@PG ID: value
/*!
* @param name Name of the program. Eg. samtools
Expand Down Expand Up @@ -1483,9 +1493,6 @@ const char *sam_parse_region(sam_hdr_t *h, const char *s, int *tid,
HTSLIB_EXPORT
int sam_write1(samFile *fp, const sam_hdr_t *h, const bam1_t *b) HTS_RESULT_USED;

// Forward declaration, see hts_expr.h for full.
struct hts_filter_t;

/// sam_passes_filter - Checks whether a record passes an hts_filter.
/** @param h Pointer to the header structure previously read
* @param b Pointer to the BAM record to be checked
Expand Down

0 comments on commit daf9502

Please sign in to comment.