From 6898cd82f9ed074d3f94f0feb8b38fb4072f44ee Mon Sep 17 00:00:00 2001 From: vasudeva8 Date: Fri, 3 Mar 2023 11:55:32 +0000 Subject: [PATCH] samples of htslib/sam api usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit update§ sample code update updated sample programs removed unnecessary files updates Merge branch 'demo' of github.com:vasudeva8/htslib into demo Merge branch 'demo' of github.com:vasudeva8/htslib into demo Merge branch 'demo' of github.com:vasudeva8/htslib into demo rollback of unrelated changes readme and some updates corrections and warning removal --- samples/README.md | 38 +++++++++++++++++------------------ samples/index_multireg_read.c | 2 +- samples/index_reg_read.c | 1 - samples/mod_aux.c | 19 +++++++++--------- samples/mod_aux_ba.c | 4 +--- samples/mod_bam.c | 8 +++----- samples/modstate.c | 10 ++++----- samples/mpileup.c | 9 ++++----- samples/pileup.c | 7 +++---- samples/pileup_mod.c | 18 ++++++++--------- samples/read_fast.c | 1 - samples/read_header.c | 2 +- samples/write_fast.c | 2 +- 13 files changed, 55 insertions(+), 66 deletions(-) diff --git a/samples/README.md b/samples/README.md index 5ab4c3179..6bea014b8 100644 --- a/samples/README.md +++ b/samples/README.md @@ -21,12 +21,14 @@ The HTSLib is expected to be installed and library and header path are part of t Math, pthread, curl, lzma, z and bz2 libraries and header files are required other than HTSLib and C libraries. -In many cases, the alignment data are expecetd as sorted, compressed and indexed. +In many cases, the alignment data are expected as sorted, compressed and indexed. ```sh -gcc -g -o -I /libhts.a -lcrypto -lm -lpthread -lcurl -llzma -lz -lbz2 #on a linux machine +gcc -g -o -I /libhts.a -lcrypto -lm -lpthread -lcurl -llzma -lz -lbz2 #linking statically on a linux machine -gcc -g -o -I /libhts.a -lm -lpthread -lcurl -llzma -lz -lbz2 #on a mac machine +gcc -g -o -I /libhts.a -lm -lpthread -lcurl -llzma -lz -lbz2 #linking statically on a mac machine + +gcc -g -o -I -L -lhts -Wl,-rpath #dynamically linking ``` @@ -41,10 +43,10 @@ gcc -g -o -I /libhts.a -lm [Split2][Split2] This application showcases the output file format selection. It saves the read1 and read2 as separate files in given directory, both as compressed sam though the extensions are different. -[cram][cram] +[Cram][Cram] This application showcases the different way in which cram reference data is used for cram output creation. -[read_fast][read_fast] +[Read_fast][Read_fast] This application showcases the fasta/fastq data read. [Read_header][Read_header] @@ -80,7 +82,7 @@ gcc -g -o -I /libhts.a -lm [Mod_aux_ba][Mod_aux_ba] This application showcases the update of auxiliary array data in alignment. It adds count of ATCGN base as an array in auxiliary data, BA:I. Modified data is written on standard output. -[write_fast][write_fast] +[Write_fast][Write_fast] This application showcases the fasta/fastq data write. It appends a dummy data to given file. [Index][Index] @@ -92,16 +94,16 @@ gcc -g -o -I /libhts.a -lm [Read_multireg][Read_multireg]: This application showcases the usage of mulitple region specification in alignment read. -[pileup][pileup]: +[Pileup][Pileup]: This application showcases the pileup api, where all alignments covering a reference position are accessed together. It displays the bases covering each position on standard output. -[mpileup][mpileup]: +[Mpileup][Mpileup]: This application showcases the mpileup api, which supports multiple input files for pileup and gives a side by side view of them in pileup format. It displays the bases covering each position on standard output. -[modstate][modstate]: +[Modstate][Modstate]: This application showcases the access of base modifications in alignment. It shows the modifications present in an alignment and accesses them using available APIs. There are 2 APIs and which one to be used can be selected through input. -[pileupmod][pileupmod]: +[Pileupmod][Pileupmod]: This application showcases the base modification access in pileup mode. It shows the pileup display with base modifications. [Flags_field][Flags_field] @@ -117,8 +119,8 @@ gcc -g -o -I /libhts.a -lm [Flags]: flags_demo.c [Split]: split.c [Split2]: split2.c -[cram]: cram.c -[read_fast]: read_fast.c +[Cram]: cram.c +[Read_fast]: read_fast.c [Read_header]: read_header.c [Read_ref]: read_refname.c [Read_bam]: read_bam.c @@ -130,16 +132,14 @@ gcc -g -o -I /libhts.a -lm [Mod_bam]: mod_bam.c [Mod_aux]: mod_aux.c [Mod_aux_ba]: mod_aux_ba.c -[write_fast]: write_fast.c +[Write_fast]: write_fast.c [Index]: index_write.c [Read_reg]: index_reg_read.c [Read_multireg]: index_multireg_read.c -[pileup]: pileup.c -[mpileup]: mpileup.c -[modstate]: modstate.c -[pileupmod]: pileup_mod.c - - +[Pileup]: pileup.c +[Mpileup]: mpileup.c +[Modstate]: modstate.c +[Pileupmod]: pileup_mod.c [Flags_field]: flags_htsopt_field.c [Split_t1]: split_thread1.c [Split_t2]: split_thread2.c diff --git a/samples/index_multireg_read.c b/samples/index_multireg_read.c index af9312e43..a62da0b27 100644 --- a/samples/index_multireg_read.c +++ b/samples/index_multireg_read.c @@ -51,7 +51,7 @@ int main(int argc, char *argv[]) { const char *inname = NULL; char *ptr = NULL; - int c = 0, ret = EXIT_FAILURE, size = 0; + int c = 0, ret = EXIT_FAILURE; samFile *infile = NULL, *outfile = NULL; sam_hdr_t *in_samhdr = NULL; bam1_t *bamdata = NULL; diff --git a/samples/index_reg_read.c b/samples/index_reg_read.c index 8c75d54b3..346d5428f 100644 --- a/samples/index_reg_read.c +++ b/samples/index_reg_read.c @@ -60,7 +60,6 @@ int main(int argc, char *argv[]) samFile *infile = NULL, *outfile = NULL; sam_hdr_t *in_samhdr = NULL; bam1_t *bamdata = NULL; - size_t cnt = 0; hts_idx_t *idx = NULL; hts_itr_t *iter = NULL; diff --git a/samples/mod_aux.c b/samples/mod_aux.c index fcbf919a6..d5ed18cde 100644 --- a/samples/mod_aux.c +++ b/samples/mod_aux.c @@ -50,7 +50,7 @@ int main(int argc, char *argv[]) { const char *inname = NULL, *tag = NULL, *qname = NULL, *val = NULL; char type = '\0'; - int c = 0, ret = EXIT_FAILURE, ret_r = 0, length = 0; + int ret = EXIT_FAILURE, ret_r = 0, length = 0; sam_hdr_t *in_samhdr = NULL; samFile *infile = NULL, *outfile = NULL; bam1_t *bamdata = NULL; @@ -67,9 +67,6 @@ int main(int argc, char *argv[]) type = argv[4][0]; val = argv[5]; - if (type >= 'a' && type <='z') { - type -= 32; //make it upper case - } if (!(bamdata = bam_init1())) { printf("Failed to allocate data memory!\n"); goto end; @@ -111,11 +108,12 @@ int main(int argc, char *argv[]) int i = 0; float f = 0; //tag not present append switch (type) { - case 'F': - case 'D': + case 'f': + case 'd': length = sizeof(float); f = atof(val); val = (const char*) &f; + type = 'f'; break; case 'C': case 'S': @@ -130,6 +128,10 @@ int main(int argc, char *argv[]) case 'A': length = 1; break; + default: + printf("Invalid type mentioned\n"); + goto end; + break; } if (bam_aux_append(bamdata, tag, type, length, (const uint8_t*)val)) { printf("Failed to append aux data, errno: %d\n", errno); @@ -140,8 +142,8 @@ int main(int argc, char *argv[]) char auxtype = bam_aux_type(data); //update the tag with newer value switch (type) { - case 'F': - case 'D': + case 'f': + case 'd': if (auxtype != 'f' && auxtype != 'd') { printf("Invalid aux type passed\n"); goto end; @@ -183,7 +185,6 @@ int main(int argc, char *argv[]) //update the char data directly on buffer *(data+1) = val[0]; break; - //TODO there is no H update support! default: printf("Invalid data type\n"); goto end; diff --git a/samples/mod_aux_ba.c b/samples/mod_aux_ba.c index 956ecd2e2..8ef90ee1e 100644 --- a/samples/mod_aux_ba.c +++ b/samples/mod_aux_ba.c @@ -49,13 +49,11 @@ returns 1 on failure 0 on success int main(int argc, char *argv[]) { const char *inname = NULL; - char type = '\0'; - int i = 0, ret = EXIT_FAILURE, ret_r = 0, length = 0; + int i = 0, ret = EXIT_FAILURE, ret_r = 0; uint32_t cnt[5] = {0}; //A C G T N sam_hdr_t *in_samhdr = NULL; samFile *infile = NULL, *outfile = NULL; bam1_t *bamdata = NULL; - uint8_t *data = NULL; //mod_aux infile if (argc != 2) { diff --git a/samples/mod_bam.c b/samples/mod_bam.c index 65654fa4c..9f1eb324e 100644 --- a/samples/mod_bam.c +++ b/samples/mod_bam.c @@ -48,15 +48,13 @@ returns 1 on failure 0 on success */ int main(int argc, char *argv[]) { - const char *inname = NULL, *tidname = NULL, *flags = NULL, *qname = NULL; + const char *inname = NULL, *qname = NULL; char *val = NULL; int c = 0, ret = EXIT_FAILURE, field = 0; sam_hdr_t *in_samhdr = NULL; samFile *infile = NULL, *outfile = NULL; int ret_r = 0, i = 0; bam1_t *bamdata = NULL; - uint8_t *data = NULL; - uint32_t *cigar = NULL; //mod_bam infile QNAME fieldpos newval if (argc != 5) { @@ -143,7 +141,7 @@ int main(int argc, char *argv[]) break; } if (bam_set1(newbam, bamdata->core.l_qname, bam_get_qname(bamdata), bamdata->core.flag, bamdata->core.tid, bamdata->core.pos, bamdata->core.qual, - ncigar, cigar, bamdata->core.mtid, bamdata->core.mpos, bamdata->core.isize, bamdata->core.l_qseq, bam_get_seq(bamdata), bam_get_qual(bamdata), bam_get_l_aux(bamdata)) < 0) { + ncigar, cigar, bamdata->core.mtid, bamdata->core.mpos, bamdata->core.isize, bamdata->core.l_qseq, (const char*)bam_get_seq(bamdata), (const char*)bam_get_qual(bamdata), bam_get_l_aux(bamdata)) < 0) { printf("Failed to set bamdata\n"); ret = -1; break; @@ -173,7 +171,7 @@ int main(int argc, char *argv[]) break; } for( c = 0; c < i; ++c) { - bam_set_seqi(bam_get_seq(bamdata), c, seq_nt16_table[val[c]]); + bam_set_seqi(bam_get_seq(bamdata), c, seq_nt16_table[(unsigned char)val[c]]); } break; case 11:// QUAL diff --git a/samples/modstate.c b/samples/modstate.c index d21346503..ef82903b6 100644 --- a/samples/modstate.c +++ b/samples/modstate.c @@ -48,7 +48,7 @@ returns 1 on failure 0 on success */ int main(int argc, char *argv[]) { - const char *inname = NULL, *tidname = NULL, *flags = NULL; + const char *inname = NULL; int ret = EXIT_FAILURE; sam_hdr_t *in_samhdr = NULL; samFile *infile = NULL; @@ -56,7 +56,6 @@ int main(int argc, char *argv[]) int ret_r = 0, i = 0 , r = 0, j = 0, pos = 0, opt = 0, k = 0, cnt = 0, *bm = NULL; bam1_t *bamdata = NULL; uint8_t *data = NULL; - uint32_t *cigar = NULL; hts_base_mod_state *ms = NULL; @@ -107,8 +106,7 @@ int main(int argc, char *argv[]) hts_base_mod mod[5] = {0}; //for ATCGN if (opt) { //option 1 - for (i; i < bamdata->core.l_qseq; ++i) { - int cnt = 0; + for (; i < bamdata->core.l_qseq; ++i) { if ((r = bam_mods_at_next_pos(bamdata, ms, mod, sizeof(mod)/sizeof(mod[0]))) <= -1) { printf("Failed to get modifications\n"); goto end; @@ -130,7 +128,7 @@ int main(int argc, char *argv[]) else { //option 2 while ((r = bam_next_basemod(bamdata, ms, mod, sizeof(mod)/sizeof(mod[0]), &pos)) >= 0) { - for (i; i < bamdata->core.l_qseq && i < pos; ++i) { + for (; i < bamdata->core.l_qseq && i < pos; ++i) { printf("%c", seq_nt16_str[bam_seqi(data, i)]); } //modifications @@ -140,7 +138,7 @@ int main(int argc, char *argv[]) if (i == pos) i++; //skip the modification already displayed if (!r) { - for (i; i < bamdata->core.l_qseq; ++i) { + for (; i < bamdata->core.l_qseq; ++i) { printf("%c", seq_nt16_str[bam_seqi(data, i)]); } break; diff --git a/samples/mpileup.c b/samples/mpileup.c index db75422fb..fe933748e 100644 --- a/samples/mpileup.c +++ b/samples/mpileup.c @@ -67,7 +67,6 @@ int plpdestructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { /// @return same as sam_read1 int readdata(void *data, bam1_t *b) { - int ret = -1; plpconf *conf = (plpconf*)data; if (!conf || !conf->infile) { return -2; //cant read data @@ -88,9 +87,9 @@ int main(int argc, char *argv[]) bam1_t *bamdata = NULL; plpconf** conf = NULL; bam_mplp_t mplpiter = NULL; - int tid = -1, input = 0, k = 0, dpt = 0, inslen = 0, dellen = 0, *depth = NULL; + int tid = -1, input = 0, k = 0, dpt = 0, *depth = NULL; hts_pos_t refpos = -1; - bam_pileup1_t **plp = NULL; + const bam_pileup1_t **plp = NULL; //infile ... if (argc < 2) { @@ -130,7 +129,7 @@ int main(int argc, char *argv[]) } } - if (!(mplpiter = bam_mplp_init(argc - 1, readdata, conf))) { + if (!(mplpiter = bam_mplp_init(argc - 1, readdata, (void**) conf))) { printf("Failed to initialize mpileup data\n"); goto end; } @@ -140,7 +139,7 @@ int main(int argc, char *argv[]) bam_mplp_destructor(mplpiter, plpdestructor); while (bam_mplp64_auto(mplpiter, &tid, &refpos, depth, plp) > 0) { - printf("%d\t%d\t", tid+1, refpos+1); + printf("%d\t%"PRIhts_pos"\t", tid+1, refpos+1); for (input = 0; input < argc - 1; ++input) { for (dpt = 0; dpt < depth[input]; ++dpt) { diff --git a/samples/pileup.c b/samples/pileup.c index 74134cf95..11e2fb02f 100644 --- a/samples/pileup.c +++ b/samples/pileup.c @@ -75,7 +75,6 @@ int plpdestructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { /// @return same as sam_read1 int readdata(void *data, bam1_t *b) { - int ret = -1; plpconf *conf = (plpconf*)data; if (!conf || !conf->infile) { return -2; //cant read data @@ -96,9 +95,9 @@ int main(int argc, char *argv[]) bam1_t *bamdata = NULL; plpconf conf = {0}; bam_plp_t plpiter = NULL; - int tid = -1, n = -1, j = 0, k = 0, inslen = 0, dellen = 0; - hts_pos_t refpos = -1; - bam_pileup1_t *plp = NULL; + int tid = -1, n = -1, j = 0, k = 0; + int refpos = -1; + const bam_pileup1_t *plp = NULL; //infile if (argc != 2) { diff --git a/samples/pileup_mod.c b/samples/pileup_mod.c index d4e28fc39..d2ef03abf 100644 --- a/samples/pileup_mod.c +++ b/samples/pileup_mod.c @@ -54,10 +54,9 @@ typedef struct plpconf { /// @param cd client data /// @return int plpconstructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { - plpconf *conf= (plpconf*)data; - //can use this to access anything required from the data in pileup init - //when using cd, initialize and use as it will be reused after destructor + //plpconf *conf= (plpconf*)data; can use this to access anything required from the data in pileup init + //when using cd, initialize and use as it will be reused after destructor cd->p = hts_base_mod_state_alloc(); if (!cd->p) { printf("Failed to allocate base modification state\n"); @@ -72,7 +71,7 @@ int plpconstructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { int plpdestructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { if (cd->p) { hts_base_mod_state_free((hts_base_mod_state *)cd->p); - cd->p = NULL; + cd->p = NULL; } return 0; } @@ -83,7 +82,6 @@ int plpdestructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { /// @return same as sam_read1 int readdata(void *data, bam1_t *b) { - int ret = -1; plpconf *conf = (plpconf*)data; if (!conf || !conf->infile) { return -2; //cant read data @@ -105,10 +103,10 @@ int main(int argc, char *argv[]) plpconf conf = {0}; bam_plp_t plpiter = NULL; int tid = -1, depth = -1, j = 0, k = 0, inslen = 0, dellen = 0, modlen = 0; - const int nmods = 5; - hts_base_mod mods[nmods] = {0}; //ACGT N - hts_pos_t refpos = -1; - bam_pileup1_t *plp = NULL; + #define NMODS 5 + hts_base_mod mods[NMODS] = {0}; //ACGT N + int refpos = -1; + const bam_pileup1_t *plp = NULL; kstring_t insdata = KS_INITIALIZE; //infile @@ -156,7 +154,7 @@ int main(int argc, char *argv[]) } /*invoke bam mods_mods_at_qpos before bam_plp_insertion_mod that the base modification is retrieved before change in pileup pos thr' plp_insertion_mod call*/ - if ((modlen = bam_mods_at_qpos(plp[j].b, plp[j].qpos, plp[j].cd.p, mods, nmods)) == -1) { + if ((modlen = bam_mods_at_qpos(plp[j].b, plp[j].qpos, plp[j].cd.p, mods, NMODS)) == -1) { printf("Failed to get modifications\n"); goto end; } diff --git a/samples/read_fast.c b/samples/read_fast.c index 5d3105b2c..a06c3eed6 100644 --- a/samples/read_fast.c +++ b/samples/read_fast.c @@ -50,7 +50,6 @@ int main(int argc, char *argv[]) { const char *inname = NULL; //input file name int c = 0, ret = EXIT_FAILURE; - int64_t cntread1 = 0, cntread2 = 0; //count samFile *infile = NULL; //sam file sam_hdr_t *in_samhdr = NULL; //header of file bam1_t *bamdata = NULL; //to hold the read data diff --git a/samples/read_header.c b/samples/read_header.c index 25a3ce148..eb14daea5 100644 --- a/samples/read_header.c +++ b/samples/read_header.c @@ -50,7 +50,7 @@ int main(int argc, char *argv[]) { const char *inname = NULL, *header = NULL, *tag = NULL, *idval = NULL; char *id = NULL; - int c = 0, ret = EXIT_FAILURE, pos = -1, linecnt = 0; + int c = 0, ret = EXIT_FAILURE, linecnt = 0; samFile *infile = NULL; sam_hdr_t *in_samhdr = NULL; kstring_t data = KS_INITIALIZE; diff --git a/samples/write_fast.c b/samples/write_fast.c index 22e29b6d9..ef7817683 100644 --- a/samples/write_fast.c +++ b/samples/write_fast.c @@ -49,7 +49,7 @@ returns 1 on failure 0 on success int main(int argc, char *argv[]) { const char *outname = NULL; //output file name - int c = 0, ret = EXIT_FAILURE; + int ret = EXIT_FAILURE; samFile *outfile = NULL; //sam file sam_hdr_t *out_samhdr = NULL; //header of file bam1_t *bamdata = NULL; //to hold the read data