apache · rtpsw · Oct 8, 2022 · Oct 10, 2022 · Oct 15, 2022 · Oct 15, 2022
diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc
@@ -140,12 +140,11 @@ ExecBatch ExecBatch::Slice(int64_t offset, int64_t length) const {
   return out;
 }
 
-Result<ExecBatch> ExecBatch::Make(std::vector<Datum> values) {
-  if (values.empty()) {
+Result<ExecBatch> ExecBatch::Make(std::vector<Datum> values, int64_t length) {
+  if (values.empty() && length < 0) {
     return Status::Invalid("Cannot infer ExecBatch length without at least one value");
   }
 
-  int64_t length = -1;
   for (const auto& value : values) {
     if (value.is_scalar()) {
       continue;

diff --git a/cpp/src/arrow/compute/exec.h b/cpp/src/arrow/compute/exec.h
@@ -180,7 +180,7 @@ struct ARROW_EXPORT ExecBatch {
 
   explicit ExecBatch(const RecordBatch& batch);
 
-  static Result<ExecBatch> Make(std::vector<Datum> values);
+  static Result<ExecBatch> Make(std::vector<Datum> values, int64_t length = -1);
 
   Result<std::shared_ptr<RecordBatch>> ToRecordBatch(
       std::shared_ptr<Schema> schema, MemoryPool* pool = default_memory_pool()) const;
@@ -233,6 +233,17 @@ struct ARROW_EXPORT ExecBatch {
 
   ExecBatch Slice(int64_t offset, int64_t length) const;
 
+  Result<ExecBatch> SelectValues(const std::vector<int>& ids) const {
+    std::vector<Datum> selected_values(ids.size());
+    for (size_t i = 0; i < ids.size(); i++) {
+      if (ids[i] < 0 || static_cast<size_t>(ids[i]) >= values.size()) {
+        return Status::Invalid("ExecBatch invalid value selection: ", ids[i]);
+      }
+      selected_values[i] = values[ids[i]];
+    }
-    std::vector<Datum> selected_values(ids.size());
-    for (size_t i = 0; i < ids.size(); i++) {
-      if (ids[i] < 0 || static_cast<size_t>(ids[i]) >= values.size()) {
-        return Status::Invalid("ExecBatch invalid value selection: ", ids[i]);
-      }
-      selected_values[i] = values[ids[i]];
-    }
+    std::vector<Datum> selected_values;
+    selected_values.reserve(ids.size());
+    for (int idx : ids) {
+      if (idx < 0 || idx >= static_cast<int>(values.size())) {
+        return Status::Invalid("ExecBatch invalid value selection: ", idx);
+      }
+      selected_values.push_back(values[idx]);
+    }
-    std::vector<Datum> selected_values(ids.size());
-    for (size_t i = 0; i < ids.size(); i++) {
-      if (ids[i] < 0 || static_cast<size_t>(ids[i]) >= values.size()) {
-        return Status::Invalid("ExecBatch invalid value selection: ", ids[i]);
-      }
-      selected_values[i] = values[ids[i]];
-    }
+    std::vector<Datum> selected_values;
+    selected_values.reserve(ids.size());
+    for (int idx : ids) {
+      if (idx < 0 || idx >= static_cast<int>(values.size())) {
+        return Status::Invalid("ExecBatch invalid value selection: ", idx);
+      }
+      selected_values.push_back(values[idx]);
+    }
+    return ExecBatch(std::move(selected_values), length);
+  }
+
   /// \brief A convenience for returning the types from the batch.
   std::vector<TypeHolder> GetTypes() const {
     std::vector<TypeHolder> result;