Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-37978: [C++] Add support for specifying custom Array element delimiter to arrow::PrettyPrintOptions #37981

Merged
merged 12 commits into from
Oct 5, 2023
Merged
19 changes: 10 additions & 9 deletions cpp/src/arrow/pretty_print.cc
Original file line number Diff line number Diff line change
Expand Up @@ -151,22 +151,22 @@ class ArrayPrinter : public PrettyPrinter {
IndentAfterNewline();
(*sink_) << "...";
if (!is_last && options_.skip_new_lines) {
(*sink_) << ",";
(*sink_) << options_.array_delimiters.element;
}
i = array.length() - window - 1;
} else if (array.IsNull(i)) {
IndentAfterNewline();
(*sink_) << options_.null_rep;
if (!is_last) {
(*sink_) << ",";
(*sink_) << options_.array_delimiters.element;
}
} else {
if (indent_non_null_values) {
IndentAfterNewline();
}
RETURN_NOT_OK(func(i));
if (!is_last) {
(*sink_) << ",";
(*sink_) << options_.array_delimiters.element;
}
}
Newline();
Expand Down Expand Up @@ -453,12 +453,12 @@ Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& op
if (!skip_new_lines) {
*sink << "\n";
}
bool skip_comma = true;
bool skip_element_delimiter = true;
for (int i = 0; i < num_chunks; ++i) {
if (skip_comma) {
skip_comma = false;
if (skip_element_delimiter) {
skip_element_delimiter = false;
} else {
(*sink) << ",";
(*sink) << options.chunked_array_delimiters.element;
if (!skip_new_lines) {
*sink << "\n";
}
Expand All @@ -467,12 +467,13 @@ Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& op
for (int i = 0; i < indent; ++i) {
(*sink) << " ";
}
(*sink) << "...,";
(*sink) << "...";
(*sink) << options.chunked_array_delimiters.element;
if (!skip_new_lines) {
*sink << "\n";
}
i = num_chunks - window - 1;
skip_comma = true;
skip_element_delimiter = true;
} else {
PrettyPrintOptions chunk_options = options;
chunk_options.indent += options.indent_size;
Expand Down
23 changes: 22 additions & 1 deletion cpp/src/arrow/pretty_print.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,21 @@ class Schema;
class Status;
class Table;

struct PrettyPrintOptions {
/// \class PrettyPrintDelimiters
/// \brief Options for controlling which delimiters to use when printing
/// an Array or ChunkedArray.
struct ARROW_EXPORT PrettyPrintDelimiters {
/// Delimiter for separating individual elements of an Array (e.g. ","),
/// or individual chunks of a ChunkedArray
std::string element = ",";

/// Create a PrettyPrintDelimiters instance with default values
static PrettyPrintDelimiters Defaults() { return PrettyPrintDelimiters(); }
};

/// \class PrettyPrintOptions
/// \brief Options for controlling how various Arrow types should be printed.
struct ARROW_EXPORT PrettyPrintOptions {
PrettyPrintOptions() = default;

PrettyPrintOptions(int indent, // NOLINT runtime/explicit
Expand All @@ -47,6 +61,7 @@ struct PrettyPrintOptions {
skip_new_lines(skip_new_lines),
truncate_metadata(truncate_metadata) {}

/// Create a PrettyPrintOptions instance with default values
static PrettyPrintOptions Defaults() { return PrettyPrintOptions(); }

/// Number of spaces to shift entire formatted object to the right
Expand Down Expand Up @@ -77,6 +92,12 @@ struct PrettyPrintOptions {

/// If true, display schema metadata when pretty-printing a Schema
bool show_schema_metadata = true;

/// Delimiters to use when printing an Array
PrettyPrintDelimiters array_delimiters = PrettyPrintDelimiters::Defaults();

/// Delimiters to use when printing a ChunkedArray
PrettyPrintDelimiters chunked_array_delimiters = PrettyPrintDelimiters::Defaults();
};

/// \brief Print human-readable representation of RecordBatch
Expand Down
111 changes: 111 additions & 0 deletions cpp/src/arrow/pretty_print_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,65 @@ TEST_F(TestPrettyPrint, PrimitiveTypeNoNewlines) {
CheckPrimitive<Int32Type, int32_t>(options, is_valid, values, expected, false);
}

TEST_F(TestPrettyPrint, ArrayCustomElementDelimiter) {
PrettyPrintOptions options{};
// Use a custom array element delimiter of " | ",
// rather than the default delimiter (i.e. ",").
options.array_delimiters.element = " | ";

// Short array without ellipsis
{
std::vector<bool> is_valid = {true, true, false, true, false};
std::vector<int32_t> values = {1, 2, 3, 4, 5};
static const char* expected = R"expected([
1 |
2 |
null |
4 |
null
])expected";
CheckPrimitive<Int32Type, int32_t>(options, is_valid, values, expected, false);
}

// Longer array with ellipsis
{
std::vector<bool> is_valid = {true, false, true};
std::vector<int32_t> values = {1, 2, 3};
// Append 20 copies of the value "10" to the end of the values vector.
values.insert(values.end(), 20, 10);
// Append 20 copies of the value "true" to the end of the validity bitmap vector.
is_valid.insert(is_valid.end(), 20, true);
// Append the values 4, 5, and 6 to the end of the values vector.
values.insert(values.end(), {4, 5, 6});
// Append the values true, false, and true to the end of the validity bitmap vector.
is_valid.insert(is_valid.end(), {true, false, true});
static const char* expected = R"expected([
1 |
null |
3 |
10 |
10 |
10 |
10 |
10 |
10 |
10 |
...
10 |
10 |
10 |
10 |
10 |
10 |
10 |
4 |
null |
6
])expected";
CheckPrimitive<Int32Type, int32_t>(options, is_valid, values, expected, false);
}
}

TEST_F(TestPrettyPrint, Int8) {
static const char* expected = R"expected([
0,
Expand Down Expand Up @@ -1020,6 +1079,58 @@ TEST_F(TestPrettyPrint, ChunkedArrayPrimitiveType) {
CheckStream(chunked_array_2, {0}, expected_2);
}

TEST_F(TestPrettyPrint, ChunkedArrayCustomElementDelimiter) {
PrettyPrintOptions options{};
// Use a custom ChunkedArray element delimiter of ";",
// rather than the default delimiter (i.e. ",").
options.chunked_array_delimiters.element = ";";
// Use a custom Array element delimiter of " | ",
// rather than the default delimiter (i.e. ",").
options.array_delimiters.element = " | ";

const auto chunk = ArrayFromJSON(int32(), "[1, 2, null, 4, null]");

// ChunkedArray with 1 chunk
{
const ChunkedArray chunked_array(chunk);

static const char* expected = R"expected([
[
1 |
2 |
null |
4 |
null
]
])expected";
CheckStream(chunked_array, options, expected);
}

// ChunkedArray with 2 chunks
{
const ChunkedArray chunked_array({chunk, chunk});

static const char* expected = R"expected([
[
1 |
2 |
null |
4 |
null
];
[
1 |
2 |
null |
4 |
null
]
])expected";

CheckStream(chunked_array, options, expected);
}
}

TEST_F(TestPrettyPrint, TablePrimitive) {
std::shared_ptr<Field> int_field = field("column", int32());
auto array = ArrayFromJSON(int_field->type(), "[0, 1, null, 3, null]");
Expand Down
Loading