diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc index 03e2051c2fb88..a4a1fa90c2878 100644 --- a/cpp/src/arrow/pretty_print.cc +++ b/cpp/src/arrow/pretty_print.cc @@ -151,14 +151,14 @@ class ArrayPrinter : public PrettyPrinter { IndentAfterNewline(); (*sink_) << "..."; if (!is_last && options_.skip_new_lines) { - (*sink_) << ","; + (*sink_) << options_.array_delimiters.element; } i = array.length() - window - 1; } else if (array.IsNull(i)) { IndentAfterNewline(); (*sink_) << options_.null_rep; if (!is_last) { - (*sink_) << ","; + (*sink_) << options_.array_delimiters.element; } } else { if (indent_non_null_values) { @@ -166,7 +166,7 @@ class ArrayPrinter : public PrettyPrinter { } RETURN_NOT_OK(func(i)); if (!is_last) { - (*sink_) << ","; + (*sink_) << options_.array_delimiters.element; } } Newline(); @@ -453,12 +453,12 @@ Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& op if (!skip_new_lines) { *sink << "\n"; } - bool skip_comma = true; + bool skip_element_delimiter = true; for (int i = 0; i < num_chunks; ++i) { - if (skip_comma) { - skip_comma = false; + if (skip_element_delimiter) { + skip_element_delimiter = false; } else { - (*sink) << ","; + (*sink) << options.chunked_array_delimiters.element; if (!skip_new_lines) { *sink << "\n"; } @@ -467,12 +467,13 @@ Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& op for (int i = 0; i < indent; ++i) { (*sink) << " "; } - (*sink) << "...,"; + (*sink) << "..."; + (*sink) << options.chunked_array_delimiters.element; if (!skip_new_lines) { *sink << "\n"; } i = num_chunks - window - 1; - skip_comma = true; + skip_element_delimiter = true; } else { PrettyPrintOptions chunk_options = options; chunk_options.indent += options.indent_size; diff --git a/cpp/src/arrow/pretty_print.h b/cpp/src/arrow/pretty_print.h index 5d22fd5c51ab8..96a214c68b8a6 100644 --- a/cpp/src/arrow/pretty_print.h +++ b/cpp/src/arrow/pretty_print.h @@ -32,7 +32,21 @@ class Schema; class Status; class Table; -struct PrettyPrintOptions { +/// \class PrettyPrintDelimiters +/// \brief Options for controlling which delimiters to use when printing +/// an Array or ChunkedArray. +struct ARROW_EXPORT PrettyPrintDelimiters { + /// Delimiter for separating individual elements of an Array (e.g. ","), + /// or individual chunks of a ChunkedArray + std::string element = ","; + + /// Create a PrettyPrintDelimiters instance with default values + static PrettyPrintDelimiters Defaults() { return PrettyPrintDelimiters(); } +}; + +/// \class PrettyPrintOptions +/// \brief Options for controlling how various Arrow types should be printed. +struct ARROW_EXPORT PrettyPrintOptions { PrettyPrintOptions() = default; PrettyPrintOptions(int indent, // NOLINT runtime/explicit @@ -47,6 +61,7 @@ struct PrettyPrintOptions { skip_new_lines(skip_new_lines), truncate_metadata(truncate_metadata) {} + /// Create a PrettyPrintOptions instance with default values static PrettyPrintOptions Defaults() { return PrettyPrintOptions(); } /// Number of spaces to shift entire formatted object to the right @@ -77,6 +92,12 @@ struct PrettyPrintOptions { /// If true, display schema metadata when pretty-printing a Schema bool show_schema_metadata = true; + + /// Delimiters to use when printing an Array + PrettyPrintDelimiters array_delimiters = PrettyPrintDelimiters::Defaults(); + + /// Delimiters to use when printing a ChunkedArray + PrettyPrintDelimiters chunked_array_delimiters = PrettyPrintDelimiters::Defaults(); }; /// \brief Print human-readable representation of RecordBatch diff --git a/cpp/src/arrow/pretty_print_test.cc b/cpp/src/arrow/pretty_print_test.cc index 9a6e347c0bdb2..45bb4ecffe054 100644 --- a/cpp/src/arrow/pretty_print_test.cc +++ b/cpp/src/arrow/pretty_print_test.cc @@ -200,6 +200,65 @@ TEST_F(TestPrettyPrint, PrimitiveTypeNoNewlines) { CheckPrimitive(options, is_valid, values, expected, false); } +TEST_F(TestPrettyPrint, ArrayCustomElementDelimiter) { + PrettyPrintOptions options{}; + // Use a custom array element delimiter of " | ", + // rather than the default delimiter (i.e. ","). + options.array_delimiters.element = " | "; + + // Short array without ellipsis + { + std::vector is_valid = {true, true, false, true, false}; + std::vector values = {1, 2, 3, 4, 5}; + static const char* expected = R"expected([ + 1 | + 2 | + null | + 4 | + null +])expected"; + CheckPrimitive(options, is_valid, values, expected, false); + } + + // Longer array with ellipsis + { + std::vector is_valid = {true, false, true}; + std::vector values = {1, 2, 3}; + // Append 20 copies of the value "10" to the end of the values vector. + values.insert(values.end(), 20, 10); + // Append 20 copies of the value "true" to the end of the validity bitmap vector. + is_valid.insert(is_valid.end(), 20, true); + // Append the values 4, 5, and 6 to the end of the values vector. + values.insert(values.end(), {4, 5, 6}); + // Append the values true, false, and true to the end of the validity bitmap vector. + is_valid.insert(is_valid.end(), {true, false, true}); + static const char* expected = R"expected([ + 1 | + null | + 3 | + 10 | + 10 | + 10 | + 10 | + 10 | + 10 | + 10 | + ... + 10 | + 10 | + 10 | + 10 | + 10 | + 10 | + 10 | + 4 | + null | + 6 +])expected"; + CheckPrimitive(options, is_valid, values, expected, false); + } +} + TEST_F(TestPrettyPrint, Int8) { static const char* expected = R"expected([ 0, @@ -1020,6 +1079,58 @@ TEST_F(TestPrettyPrint, ChunkedArrayPrimitiveType) { CheckStream(chunked_array_2, {0}, expected_2); } +TEST_F(TestPrettyPrint, ChunkedArrayCustomElementDelimiter) { + PrettyPrintOptions options{}; + // Use a custom ChunkedArray element delimiter of ";", + // rather than the default delimiter (i.e. ","). + options.chunked_array_delimiters.element = ";"; + // Use a custom Array element delimiter of " | ", + // rather than the default delimiter (i.e. ","). + options.array_delimiters.element = " | "; + + const auto chunk = ArrayFromJSON(int32(), "[1, 2, null, 4, null]"); + + // ChunkedArray with 1 chunk + { + const ChunkedArray chunked_array(chunk); + + static const char* expected = R"expected([ + [ + 1 | + 2 | + null | + 4 | + null + ] +])expected"; + CheckStream(chunked_array, options, expected); + } + + // ChunkedArray with 2 chunks + { + const ChunkedArray chunked_array({chunk, chunk}); + + static const char* expected = R"expected([ + [ + 1 | + 2 | + null | + 4 | + null + ]; + [ + 1 | + 2 | + null | + 4 | + null + ] +])expected"; + + CheckStream(chunked_array, options, expected); + } +} + TEST_F(TestPrettyPrint, TablePrimitive) { std::shared_ptr int_field = field("column", int32()); auto array = ArrayFromJSON(int_field->type(), "[0, 1, null, 3, null]");