Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-17642: [C++] Add ordered aggregation #14352

Closed
wants to merge 14 commits into from
5 changes: 5 additions & 0 deletions cpp/src/arrow/compare.cc
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,11 @@ class RangeDataEqualsImpl {
Status Visit(const StructType& type) {
const int32_t num_fields = type.num_fields();

if (left_.child_data.size() != static_cast<size_t>(num_fields) ||
right_.child_data.size() != static_cast<size_t>(num_fields)) {
result_ = false;
return Status::OK();
}
rtpsw marked this conversation as resolved.
Show resolved Hide resolved
auto compare_runs = [&](int64_t i, int64_t length) -> bool {
for (int32_t f = 0; f < num_fields; ++f) {
RangeDataEqualsImpl impl(options_, floating_approximate_, *left_.child_data[f],
Expand Down
39 changes: 26 additions & 13 deletions cpp/src/arrow/compute/exec.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,15 +89,22 @@ void PrintTo(const ExecBatch& batch, std::ostream* os) {

if (value.is_scalar()) {
*os << "Scalar[" << value.scalar()->ToString() << "]\n";
continue;
} else if (value.is_array() || value.is_chunked_array()) {
PrettyPrintOptions options;
options.skip_new_lines = true;
if (value.is_array()) {
auto array = value.make_array();
*os << "Array";
ARROW_CHECK_OK(PrettyPrint(*array, options, os));
} else {
auto array = value.chunked_array();
*os << "Chunked Array";
ARROW_CHECK_OK(PrettyPrint(*array, options, os));
}
*os << "\n";
} else {
ARROW_DCHECK(false);
}

auto array = value.make_array();
PrettyPrintOptions options;
options.skip_new_lines = true;
*os << "Array";
ARROW_CHECK_OK(PrettyPrint(*array, options, os));
*os << "\n";
}
}

Expand All @@ -118,19 +125,25 @@ std::string ExecBatch::ToString() const {
ExecBatch ExecBatch::Slice(int64_t offset, int64_t length) const {
ExecBatch out = *this;
for (auto& value : out.values) {
if (value.is_scalar()) continue;
value = value.array()->Slice(offset, length);
if (value.is_scalar()) {
// keep value as is
} else if (value.is_array()) {
value = value.array()->Slice(offset, length);
westonpace marked this conversation as resolved.
Show resolved Hide resolved
} else if (value.is_chunked_array()) {
value = value.chunked_array()->Slice(offset, length);
} else {
ARROW_DCHECK(false);
}
}
out.length = std::min(length, this->length - offset);
return out;
}

Result<ExecBatch> ExecBatch::Make(std::vector<Datum> values) {
if (values.empty()) {
Result<ExecBatch> ExecBatch::Make(std::vector<Datum> values, int64_t length) {
if (values.empty() && length < 0) {
return Status::Invalid("Cannot infer ExecBatch length without at least one value");
}

int64_t length = -1;
for (const auto& value : values) {
if (value.is_scalar()) {
continue;
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/exec.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ struct ARROW_EXPORT ExecBatch {

explicit ExecBatch(const RecordBatch& batch);

static Result<ExecBatch> Make(std::vector<Datum> values);
static Result<ExecBatch> Make(std::vector<Datum> values, int64_t length = -1);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Naively one looking at this might be confused why you now need both ExecBatch::Make and ExecBatch::ExecBatch since both take a vector of values and a length.

Looking closer it seems the Make function does the extra work of verifying that the length of the datums match the given length.

Could you add some comments explaining this for future readers?


Result<std::shared_ptr<RecordBatch>> ToRecordBatch(
std::shared_ptr<Schema> schema, MemoryPool* pool = default_memory_pool()) const;
Expand Down
Loading