From 8b0121205e413feb5004bee8beb64469c7d696c1 Mon Sep 17 00:00:00 2001 From: vuule Date: Mon, 15 Mar 2021 18:04:34 -0700 Subject: [PATCH] fix stream size for string columns; make stream length 64bit --- cpp/src/io/orc/reader_impl.cu | 2 +- cpp/src/io/orc/writer_impl.cu | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 80935e3fbd5..61adef26dab 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -139,7 +139,7 @@ struct orc_stream_info { } uint64_t offset; // offset in file size_t dst_pos; // offset in memory relative to start of compressed stripe data - uint32_t length; // length in file + size_t length; // length in file uint32_t gdf_idx; // column index uint32_t stripe_idx; // stripe index }; diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 1c99c99369b..eb5e90bbeec 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -476,7 +476,6 @@ orc_streams writer::impl::create_streams(host_span columns, break; case TypeKind::STRING: { bool enable_dict = enable_dictionary_; - size_t direct_data_size = 0; size_t dict_data_size = 0; size_t dict_strings = 0; size_t dict_lengths_div512 = 0; @@ -488,11 +487,15 @@ orc_streams writer::impl::create_streams(host_span columns, dict_lengths_div512 += (sd->num_strings + 0x1ff) >> 9; dict_data_size += sd->dict_char_count; } - direct_data_size += std::accumulate( - stripe.cbegin(), stripe.cend(), direct_data_size, [&](auto data_size, auto rg_idx) { - return data_size + column.host_dict_chunk(rg_idx)->string_char_count; - }); } + + auto const direct_data_size = + std::accumulate(stripe_bounds.front().cbegin(), + stripe_bounds.back().cend(), + size_t{0}, + [&](auto data_size, auto rg_idx) { + return data_size + column.host_dict_chunk(rg_idx)->string_char_count; + }); if (enable_dict) { uint32_t dict_bits = 0; for (dict_bits = 1; dict_bits < 32; dict_bits <<= 1) {