Skip to content

Commit

Permalink
update tests to test dictionary read of string columns
Browse files Browse the repository at this point in the history
  • Loading branch information
bkietz committed Feb 23, 2020
1 parent 22caaa1 commit 7546851
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 8 deletions.
8 changes: 6 additions & 2 deletions cpp/src/arrow/dataset/file_parquet_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ TEST_F(TestParquetFileFormat, ScanRecordBatchReader) {
}

TEST_F(TestParquetFileFormat, ScanRecordBatchReaderDictEncoded) {
schema_ = schema({field("utf8", utf8())});

auto reader = GetRecordBatchReader();
auto source = GetFileSource(reader.get());

Expand All @@ -198,7 +200,7 @@ TEST_F(TestParquetFileFormat, ScanRecordBatchReaderDictEncoded) {
ASSERT_OK_AND_ASSIGN(auto scan_task_it, fragment->Scan(ctx_));
int64_t row_count = 0;

Schema expected_schema({field("f64", dictionary(int32(), float64()))});
Schema expected_schema({field("utf8", dictionary(int32(), utf8()))});

for (auto maybe_task : scan_task_it) {
ASSERT_OK_AND_ASSIGN(auto task, std::move(maybe_task));
Expand Down Expand Up @@ -309,13 +311,15 @@ TEST_F(TestParquetFileFormat, Inspect) {
}

TEST_F(TestParquetFileFormat, InspectDictEncoded) {
schema_ = schema({field("utf8", utf8())});

auto reader = GetRecordBatchReader();
auto source = GetFileSource(reader.get());

format_->read_dict_indices.insert(0);
ASSERT_OK_AND_ASSIGN(auto actual, format_->Inspect(*source.get()));

Schema expected_schema({field("f64", dictionary(int32(), float64()))});
Schema expected_schema({field("utf8", dictionary(int32(), utf8()))});
EXPECT_EQ(*actual, expected_schema);
}

Expand Down
5 changes: 4 additions & 1 deletion cpp/src/arrow/status.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,12 @@ std::string Status::CodeAsString() const {
if (state_ == nullptr) {
return "OK";
}
return CodeAsString(code());
}

std::string Status::CodeAsString(StatusCode code) {
const char* type;
switch (code()) {
switch (code) {
case StatusCode::OK:
type = "OK";
break;
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/status.h
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ class ARROW_EXPORT Status : public util::EqualityComparable<Status>,
/// \brief Return a string representation of the status code, without the message
/// text or POSIX code information.
std::string CodeAsString() const;
static std::string CodeAsString(StatusCode);

/// \brief Return the StatusCode value attached to this status.
StatusCode code() const { return ok() ? StatusCode::OK : state_->code; }
Expand Down
9 changes: 7 additions & 2 deletions cpp/src/arrow/testing/generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ namespace arrow {

template <typename ArrowType, typename CType = typename TypeTraits<ArrowType>::CType,
typename BuilderType = typename TypeTraits<ArrowType>::BuilderType>
static inline std::shared_ptr<Array> ConstantArray(int64_t size, CType value = 0) {
static inline std::shared_ptr<Array> ConstantArray(int64_t size, CType value) {
auto type = TypeTraits<ArrowType>::type_singleton();
auto builder_fn = [](BuilderType* builder) { builder->UnsafeAppend(CType(0)); };
auto builder_fn = [&](BuilderType* builder) { builder->UnsafeAppend(value); };
return ArrayFromBuilderVisitor(type, size, builder_fn).ValueOrDie();
}

Expand Down Expand Up @@ -90,4 +90,9 @@ std::shared_ptr<arrow::Array> ConstantArrayGenerator::Float64(int64_t size,
return ConstantArray<DoubleType>(size, value);
}

std::shared_ptr<arrow::Array> ConstantArrayGenerator::String(int64_t size,
std::string value) {
return ConstantArray<StringType>(size, value);
}

} // namespace arrow
11 changes: 11 additions & 0 deletions cpp/src/arrow/testing/generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include <cstdint>
#include <memory>
#include <string>
#include <vector>

#include "arrow/record_batch.h"
Expand Down Expand Up @@ -118,6 +119,14 @@ class ARROW_EXPORT ConstantArrayGenerator {
/// \return a generated Array
static std::shared_ptr<arrow::Array> Float64(int64_t size, double value = 0);

/// \brief Generates a constant StringArray
///
/// \param[in] size the size of the array to generate
/// \param[in] value to repeat
///
/// \return a generated Array
static std::shared_ptr<arrow::Array> String(int64_t size, std::string value = "");

template <typename ArrowType, typename CType = typename ArrowType::c_type>
static std::shared_ptr<arrow::Array> Numeric(int64_t size, CType value = 0) {
switch (ArrowType::type_id) {
Expand Down Expand Up @@ -179,6 +188,8 @@ class ARROW_EXPORT ConstantArrayGenerator {
return Float32(size);
case Type::DOUBLE:
return Float64(size);
case Type::STRING:
return String(size);
default:
return nullptr;
}
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/parquet/arrow/arrow_reader_writer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2689,9 +2689,9 @@ void TryReadDataFile(const std::string& path,
s = arrow_reader->ReadTable(&table);
}

ASSERT_TRUE(s.code() == expected_code)
<< "Expected reading file to return "
<< arrow::Status(expected_code, "").CodeAsString() << ", but got " << s.ToString();
ASSERT_EQ(s.code(), expected_code)
<< "Expected reading file to return " << arrow::Status::CodeAsString(expected_code)
<< ", but got " << s.ToString();
}

TEST(TestArrowReaderAdHoc, Int96BadMemoryAccess) {
Expand Down

0 comments on commit 7546851

Please sign in to comment.