diff --git a/c_glib/test/test-orc-file-reader.rb b/c_glib/test/test-orc-file-reader.rb index 38900cf12f3d0..6626c67c3abe4 100644 --- a/c_glib/test/test-orc-file-reader.rb +++ b/c_glib/test/test-orc-file-reader.rb @@ -185,8 +185,8 @@ def all_columns test("select fields") do require_gi_bindings(3, 2, 6) @reader.field_indices = [1, 3] - assert_equal(build_table("boolean1" => build_boolean_array([false, true]), - "short1" => build_int16_array([1024, 2048])), + assert_equal(build_table("byte1" => build_int8_array([1, 100]), + "int1" => build_int32_array([65536, 65536])), @reader.read_stripes) end end @@ -200,10 +200,8 @@ def all_columns test("select fields") do require_gi_bindings(3, 2, 6) @reader.field_indices = [1, 3] - boolean1 = build_boolean_array([false, true]) - short1 = build_int16_array([1024, 2048]) - assert_equal(build_record_batch("boolean1" => boolean1, - "short1" => short1), + assert_equal(build_record_batch("byte1" => build_int8_array([1, 100]), + "int1" => build_int32_array([65536, 65536])), @reader.read_stripe(0)) end end diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc index 5af5ebccc8470..18f88bc6dfb91 100644 --- a/cpp/src/arrow/adapters/orc/adapter.cc +++ b/cpp/src/arrow/adapters/orc/adapter.cc @@ -411,7 +411,7 @@ class ORCFileReader::Impl { ARROW_RETURN_IF(*it < 0, Status::Invalid("Negative field index")); include_indices_list.push_back(*it); } - opts->includeTypes(include_indices_list); + opts->include(include_indices_list); return Status::OK(); } diff --git a/cpp/src/arrow/adapters/orc/adapter_test.cc b/cpp/src/arrow/adapters/orc/adapter_test.cc index 5c234cc97c4a0..afc4bdb1d3b24 100644 --- a/cpp/src/arrow/adapters/orc/adapter_test.cc +++ b/cpp/src/arrow/adapters/orc/adapter_test.cc @@ -226,7 +226,8 @@ std::shared_ptr GenerateRandomTable(const std::shared_ptr& schema void AssertTableWriteReadEqual(const std::shared_ptr
& input_table, const std::shared_ptr
& expected_output_table, - const int64_t max_size = kDefaultSmallMemStreamSize) { + const int64_t max_size = kDefaultSmallMemStreamSize, + std::vector* opt_selected_read_indices = nullptr) { EXPECT_OK_AND_ASSIGN(auto buffer_output_stream, io::BufferOutputStream::Create(max_size)); auto write_options = adapters::orc::WriteOptions(); @@ -250,7 +251,11 @@ void AssertTableWriteReadEqual(const std::shared_ptr
& input_table, ASSERT_EQ(reader->GetCompression(), write_options.compression); ASSERT_EQ(reader->GetCompressionSize(), write_options.compression_block_size); ASSERT_EQ(reader->GetRowIndexStride(), write_options.row_index_stride); - EXPECT_OK_AND_ASSIGN(auto actual_output_table, reader->Read()); + EXPECT_OK_AND_ASSIGN(auto actual_output_table, + opt_selected_read_indices == nullptr + ? reader->Read() + : reader->Read(*opt_selected_read_indices)); + ASSERT_OK(actual_output_table->ValidateFull()); AssertTablesEqual(*expected_output_table, *actual_output_table, false, false); } @@ -451,6 +456,37 @@ TEST_F(TestORCWriterTrivialNoConversion, writeChunkless) { std::shared_ptr
table = TableFromJSON(table_schema, {}); AssertTableWriteReadEqual(table, table, kDefaultSmallMemStreamSize / 16); } +TEST_F(TestORCWriterTrivialNoConversion, writeTrivialChunkAndSelectField) { + std::shared_ptr
table = TableFromJSON(table_schema, {R"([])"}); + std::shared_ptr schema_selected = + schema({field("int8", int8()), field("int32", int32())}); + std::shared_ptr
table_selected = TableFromJSON(schema_selected, {R"([])"}); + std::vector selected_indices = {1, 3}; + AssertTableWriteReadEqual(table, table_selected, kDefaultSmallMemStreamSize / 16, + &selected_indices); +} +TEST_F(TestORCWriterTrivialNoConversion, writeFilledChunkAndSelectField) { + std::vector selected_indices = {1, 7}; + random::RandomArrayGenerator rand(kRandomSeed); + std::shared_ptr local_schema = schema({ + field("bool", boolean()), + field("int32", int32()), + field("int64", int64()), + field("float", float32()), + field("struct", struct_({field("a", utf8()), field("b", int64())})), + field("double", float64()), + field("date32", date32()), + field("ts3", timestamp(TimeUnit::NANO)), + field("string", utf8()), + field("binary", binary()), + }); + auto batch = rand.BatchOf(local_schema->fields(), 100); + std::shared_ptr
table = Table::Make(local_schema, batch->columns()); + EXPECT_OK_AND_ASSIGN(auto table_selected, table->SelectColumns(selected_indices)); + AssertTableWriteReadEqual(table, table_selected, kDefaultSmallMemStreamSize, + &selected_indices); +} + class TestORCWriterTrivialWithConversion : public ::testing::Test { public: TestORCWriterTrivialWithConversion() { diff --git a/ruby/red-arrow/test/test-orc.rb b/ruby/red-arrow/test/test-orc.rb index b882da0a1b5e3..4670350a09dbc 100644 --- a/ruby/red-arrow/test/test-orc.rb +++ b/ruby/red-arrow/test/test-orc.rb @@ -164,8 +164,8 @@ def pp_values(values) ] end assert_equal([ - ["boolean1: bool", [pp_values([false, true])]], - ["short1: int16", [pp_values([1024, 2048])]], + ["byte1: int8", [pp_values([1, 100])]], + ["int1: int32", [pp_values([65536, 65536])]], ], dump) end