diff --git a/cram/cram_decode.c b/cram/cram_decode.c index 39869cbdd..47b7ed076 100644 --- a/cram/cram_decode.c +++ b/cram/cram_decode.c @@ -2059,6 +2059,13 @@ static int cram_decode_aux(cram_fd *fd, *has_NM = 1; } } + + // We could go to 2^32 fine, but we shouldn't be hitting this anyway, + // and it's protecting against memory hogs too. + if (BLOCK_SIZE(s->aux_blk) > (1u<<31)) { + hts_log_error("CRAM->BAM aux block size overflow"); + goto block_err; + } } return r; diff --git a/cram/cram_encode.c b/cram/cram_encode.c index 5b56aedd5..9797fa7a8 100644 --- a/cram/cram_encode.c +++ b/cram/cram_encode.c @@ -3852,7 +3852,7 @@ int cram_put_bam_seq(cram_fd *fd, bam_seq_t *b) { if (!c->slice || c->curr_rec == c->max_rec || (bam_ref(b) != c->curr_ref && c->curr_ref >= -1) || - (c->s_num_bases >= fd->bases_per_slice)) { + (c->s_num_bases + c->s_aux_bytes >= fd->bases_per_slice)) { int slice_rec, curr_rec, multi_seq = fd->multi_seq == 1; int curr_ref = c->slice ? c->curr_ref : bam_ref(b); @@ -3885,7 +3885,7 @@ int cram_put_bam_seq(cram_fd *fd, bam_seq_t *b) { if (CRAM_MAJOR_VERS(fd->version) == 1 || c->curr_rec == c->max_rec || fd->multi_seq != 1 || !c->slice || - c->s_num_bases >= fd->bases_per_slice) { + c->s_num_bases + c->s_aux_bytes >= fd->bases_per_slice) { if (NULL == (c = cram_next_container(fd, b))) { if (fd->ctr) { // prevent cram_close attempting to flush @@ -3997,6 +3997,7 @@ int cram_put_bam_seq(cram_fd *fd, bam_seq_t *b) { c->curr_rec++; c->curr_c_rec++; c->s_num_bases += bam_seq_len(b); + c->s_aux_bytes += bam_get_l_aux(b); c->n_mapped += (bam_flag(b) & BAM_FUNMAP) ? 0 : 1; fd->record_counter++; diff --git a/cram/cram_structs.h b/cram/cram_structs.h index 0a66d51b9..15b7f145b 100644 --- a/cram/cram_structs.h +++ b/cram/cram_structs.h @@ -473,6 +473,7 @@ struct cram_container { uint32_t crc32; // CRC32 uint64_t s_num_bases; // number of bases in this slice + uint64_t s_aux_bytes; // number of bytes of aux in BAM uint32_t n_mapped; // Number of mapped reads int ref_free; // whether 'ref' is owned by us and must be freed. @@ -500,8 +501,8 @@ typedef struct cram_record { // Auxiliary data int32_t ntags; // TC - int32_t aux; // idx to s->aux_blk - int32_t aux_size; // total size of packed ntags in aux_blk + uint32_t aux; // idx to s->aux_blk + uint32_t aux_size; // total size of packed ntags in aux_blk #ifndef TN_external int32_t TN_idx; // TN; idx to s->TN; #else @@ -509,15 +510,15 @@ typedef struct cram_record { #endif int TL; - int32_t seq; // idx to s->seqs_blk - int32_t qual; // idx to s->qual_blk - int32_t cigar; // idx to s->cigar + uint32_t seq; // idx to s->seqs_blk + uint32_t qual; // idx to s->qual_blk + uint32_t cigar; // idx to s->cigar int32_t ncigar; int64_t aend; // alignment end int32_t mqual; // MQ - int32_t feature; // idx to s->feature - int32_t nfeature; // number of features + uint32_t feature; // idx to s->feature + uint32_t nfeature; // number of features int32_t mate_flags; // MF } cram_record; @@ -623,8 +624,8 @@ struct cram_slice { uint32_t ncigar; cram_feature *features; - int nfeatures; - int afeatures; // allocated size of features + uint32_t nfeatures; + uint32_t afeatures; // allocated size of features #ifndef TN_external // TN field (Tag Name)