From 6f4c9919019926f0f042efa91b9b112d11ebaa61 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Thu, 4 Nov 2021 16:48:14 +0100
Subject: [PATCH] ARROW-13130: [C++] Add decimal support to arithmetic kernels

This adds decimal support for the following kernels (and _checked variants where applicable): abs, acos, add/sub/mul/div, asin, atan, atan2, ceil, cos, floor, hash_stddev, hash_tdigest, hash_variance, is_finite, is_inf, is_nan, ln, log1p, log2, logb, mode, negate, power, quantile, round, round_to_multiple, sign, sin, stddev/variance, tan, tdigest, trunc

Most kernels cast decimals to double and proceed. Some, including rounding, directly operate on decimals. Aggregate kernels directly operate on decimals (and cast to double inline) since DispatchBest is not usable for the aggregate nodes (at least, unless we also reimplement implicit casting there).

Additionally, ValidateFull for scalars/arrays now checks FitsInPrecision. A number of tests were adjusted to account for this.

Closes #11313 from lidavidm/arrow-13130

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../arrow/compute/kernels/aggregate_basic.cc  |   5 +-
 .../compute/kernels/aggregate_internal.h      |  21 ++
 .../arrow/compute/kernels/aggregate_mode.cc   |  95 +++--
 .../compute/kernels/aggregate_quantile.cc     |  50 ++-
 .../compute/kernels/aggregate_tdigest.cc      |  36 +-
 .../arrow/compute/kernels/aggregate_test.cc   | 193 +++++++++--
 .../compute/kernels/aggregate_var_std.cc      |  53 ++-
 .../arrow/compute/kernels/hash_aggregate.cc   | 303 ++++++++--------
 .../compute/kernels/hash_aggregate_test.cc    |  91 +++++
 .../compute/kernels/scalar_arithmetic.cc      | 124 +++++--
 .../compute/kernels/scalar_arithmetic_test.cc | 327 +++++++++++++++++-
 .../arrow/compute/kernels/scalar_validity.cc  |  39 +++
 .../compute/kernels/scalar_validity_test.cc   |  39 +++
 cpp/src/arrow/util/basic_decimal.cc           |  12 +
 cpp/src/arrow/util/basic_decimal.h            |   6 +
 docs/source/cpp/compute.rst                   | 162 +++++----
 16 files changed, 1197 insertions(+), 359 deletions(-)
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
index 0bc839be8e3cc..53fa2d2d7f702 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
@@ -422,9 +422,8 @@ void AddMinOrMaxAggKernel(ScalarAggregateFunction* func,
   auto init = [min_max_func](
                   KernelContext* ctx,
                   const KernelInitArgs& args) -> Result<std::unique_ptr<KernelState>> {
-    std::vector<ValueDescr> inputs = args.inputs;
-    ARROW_ASSIGN_OR_RAISE(auto kernel, min_max_func->DispatchBest(&inputs));
-    KernelInitArgs new_args{kernel, inputs, args.options};
+    ARROW_ASSIGN_OR_RAISE(auto kernel, min_max_func->DispatchExact(args.inputs));
+    KernelInitArgs new_args{kernel, args.inputs, args.options};
     return kernel->init(ctx, new_args);
   };
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_internal.h b/cpp/src/arrow/compute/kernels/aggregate_internal.h
index 22a54558f4e8a..946ec01900c5b 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_internal.h
+++ b/cpp/src/arrow/compute/kernels/aggregate_internal.h
@@ -21,6 +21,7 @@
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_run_reader.h"
+#include "arrow/util/int128_internal.h"
 #include "arrow/util/logging.h"
 
 namespace arrow {
@@ -111,6 +112,26 @@ void AddAggKernel(std::shared_ptr<KernelSignature> sig, KernelInit init,
                   ScalarAggregateFinalize finalize, ScalarAggregateFunction* func,
                   SimdLevel::type simd_level = SimdLevel::NONE);
 
+using arrow::internal::VisitSetBitRunsVoid;
+
+template <typename T, typename Enable = void>
+struct GetSumType;
+
+template <typename T>
+struct GetSumType<T, enable_if_floating_point<T>> {
+  using SumType = double;
+};
+
+template <typename T>
+struct GetSumType<T, enable_if_integer<T>> {
+  using SumType = arrow::internal::int128_t;
+};
+
+template <typename T>
+struct GetSumType<T, enable_if_decimal<T>> {
+  using SumType = typename TypeTraits<T>::CType;
+};
+
 // SumArray must be parameterized with the SIMD level since it's called both from
 // translation units with and without vectorization. Normally it gets inlined but
 // if not, without the parameter, we'll have multiple definitions of the same
diff --git a/cpp/src/arrow/compute/kernels/aggregate_mode.cc b/cpp/src/arrow/compute/kernels/aggregate_mode.cc
index f225f6bf569c3..6a50556a13efd 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_mode.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_mode.cc
@@ -40,10 +40,13 @@ constexpr char kCountFieldName[] = "count";
 
 constexpr uint64_t kCountEOF = ~0ULL;
 
-template <typename InType, typename CType = typename InType::c_type>
+template <typename InType, typename CType = typename TypeTraits<InType>::CType>
 Result<std::pair<CType*, int64_t*>> PrepareOutput(int64_t n, KernelContext* ctx,
                                                   Datum* out) {
-  const auto& mode_type = TypeTraits<InType>::type_singleton();
+  DCHECK_EQ(Type::STRUCT, out->type()->id());
+  const auto& out_type = checked_cast<const StructType&>(*out->type());
+  DCHECK_EQ(2, out_type.num_fields());
+  const auto& mode_type = out_type.field(0)->type();
   const auto& count_type = int64();
 
   auto mode_data = ArrayData::Make(mode_type, /*length=*/n, /*null_count=*/0);
@@ -61,10 +64,7 @@ Result<std::pair<CType*, int64_t*>> PrepareOutput(int64_t n, KernelContext* ctx,
     count_buffer = count_data->template GetMutableValues<int64_t>(1);
   }
 
-  const auto& out_type =
-      struct_({field(kModeFieldName, mode_type), field(kCountFieldName, count_type)});
-  *out = Datum(ArrayData::Make(out_type, n, {nullptr}, {mode_data, count_data}, 0));
-
+  *out = Datum(ArrayData::Make(out->type(), n, {nullptr}, {mode_data, count_data}, 0));
   return std::make_pair(mode_buffer, count_buffer);
 }
 
@@ -72,7 +72,7 @@ Result<std::pair<CType*, int64_t*>> PrepareOutput(int64_t n, KernelContext* ctx,
 // suboptimal for tiny or large n, possibly okay as we're not in hot path
 template <typename InType, typename Generator>
 Status Finalize(KernelContext* ctx, Datum* out, Generator&& gen) {
-  using CType = typename InType::c_type;
+  using CType = typename TypeTraits<InType>::CType;
 
   using ValueCountPair = std::pair<CType, uint64_t>;
   auto gt = [](const ValueCountPair& lhs, const ValueCountPair& rhs) {
@@ -203,13 +203,25 @@ struct CountModer<BooleanType> {
   }
 };
 
-// copy and sort approach for floating points or integers with wide value range
+// copy and sort approach for floating points, decimals, or integers with wide
+// value range
 // O(n) space, O(nlogn) time
 template <typename T>
 struct SortModer {
-  using CType = typename T::c_type;
+  using CType = typename TypeTraits<T>::CType;
   using Allocator = arrow::stl::allocator<CType>;
 
+  template <typename Type = T>
+  static enable_if_floating_point<Type, CType> GetNan() {
+    return static_cast<CType>(NAN);
+  }
+
+  template <typename Type = T>
+  static enable_if_t<!is_floating_type<Type>::value, CType> GetNan() {
+    DCHECK(false);
+    return static_cast<CType>(0);
+  }
+
   Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const Datum& datum = batch[0];
     const int64_t in_length = datum.length() - datum.null_count();
@@ -246,7 +258,7 @@ struct SortModer {
       if (ARROW_PREDICT_FALSE(it == in_buffer.cend())) {
         // handle NAN at last
         if (nan_count > 0) {
-          auto value_count = std::make_pair(static_cast<CType>(NAN), nan_count);
+          auto value_count = std::make_pair(GetNan(), nan_count);
           nan_count = 0;
           return value_count;
         }
@@ -318,13 +330,18 @@ struct Moder<InType, enable_if_t<(is_integer_type<InType>::value &&
 };
 
 template <typename InType>
-struct Moder<InType, enable_if_t<is_floating_type<InType>::value>> {
+struct Moder<InType, enable_if_floating_point<InType>> {
+  SortModer<InType> impl;
+};
+
+template <typename InType>
+struct Moder<InType, enable_if_decimal<InType>> {
   SortModer<InType> impl;
 };
 
 template <typename T>
 Status ScalarMode(KernelContext* ctx, const Scalar& scalar, Datum* out) {
-  using CType = typename T::c_type;
+  using CType = typename TypeTraits<T>::CType;
 
   const ModeOptions& options = ModeState::Get(ctx);
   if ((!options.skip_nulls && !scalar.is_valid) ||
@@ -366,30 +383,33 @@ struct ModeExecutor {
   }
 };
 
-VectorKernel NewModeKernel(const std::shared_ptr<DataType>& in_type) {
+Result<ValueDescr> ModeType(KernelContext*, const std::vector<ValueDescr>& descrs) {
+  return ValueDescr::Array(
+      struct_({field(kModeFieldName, descrs[0].type), field(kCountFieldName, int64())}));
+}
+
+VectorKernel NewModeKernel(const std::shared_ptr<DataType>& in_type,
+                           ArrayKernelExec exec) {
   VectorKernel kernel;
   kernel.init = ModeState::Init;
   kernel.can_execute_chunkwise = false;
   kernel.output_chunked = false;
-  auto out_type =
-      struct_({field(kModeFieldName, in_type), field(kCountFieldName, int64())});
-  kernel.signature =
-      KernelSignature::Make({InputType(in_type)}, ValueDescr::Array(out_type));
-  return kernel;
-}
-
-void AddBooleanModeKernel(VectorFunction* func) {
-  VectorKernel kernel = NewModeKernel(boolean());
-  kernel.exec = ModeExecutor<StructType, BooleanType>::Exec;
-  DCHECK_OK(func->AddKernel(kernel));
-}
-
-void AddNumericModeKernels(VectorFunction* func) {
-  for (const auto& type : NumericTypes()) {
-    VectorKernel kernel = NewModeKernel(type);
-    kernel.exec = GenerateNumeric<ModeExecutor, StructType>(*type);
-    DCHECK_OK(func->AddKernel(kernel));
+  switch (in_type->id()) {
+    case Type::DECIMAL128:
+    case Type::DECIMAL256:
+      kernel.signature =
+          KernelSignature::Make({InputType(in_type->id())}, OutputType(ModeType));
+      break;
+    default: {
+      auto out_type =
+          struct_({field(kModeFieldName, in_type), field(kCountFieldName, int64())});
+      kernel.signature = KernelSignature::Make({InputType(in_type->id())},
+                                               ValueDescr::Array(std::move(out_type)));
+      break;
+    }
   }
+  kernel.exec = std::move(exec);
+  return kernel;
 }
 
 const FunctionDoc mode_doc{
@@ -409,8 +429,17 @@ void RegisterScalarAggregateMode(FunctionRegistry* registry) {
   static auto default_options = ModeOptions::Defaults();
   auto func = std::make_shared<VectorFunction>("mode", Arity::Unary(), &mode_doc,
                                                &default_options);
-  AddBooleanModeKernel(func.get());
-  AddNumericModeKernels(func.get());
+  DCHECK_OK(func->AddKernel(
+      NewModeKernel(boolean(), ModeExecutor<StructType, BooleanType>::Exec)));
+  for (const auto& type : NumericTypes()) {
+    DCHECK_OK(func->AddKernel(
+        NewModeKernel(type, GenerateNumeric<ModeExecutor, StructType>(*type))));
+  }
+  // Type parameters are ignored
+  DCHECK_OK(func->AddKernel(
+      NewModeKernel(decimal128(1, 0), ModeExecutor<StructType, Decimal128Type>::Exec)));
+  DCHECK_OK(func->AddKernel(
+      NewModeKernel(decimal256(1, 0), ModeExecutor<StructType, Decimal256Type>::Exec)));
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_quantile.cc b/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
index 62e375e695087..cd2410cc9eb75 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
@@ -71,10 +71,21 @@ uint64_t QuantileToDataPoint(size_t length, double q,
   return datapoint_index;
 }
 
+template <typename T>
+double DataPointToDouble(T value, const DataType&) {
+  return static_cast<double>(value);
+}
+double DataPointToDouble(const Decimal128& value, const DataType& ty) {
+  return value.ToDouble(checked_cast<const DecimalType&>(ty).scale());
+}
+double DataPointToDouble(const Decimal256& value, const DataType& ty) {
+  return value.ToDouble(checked_cast<const DecimalType&>(ty).scale());
+}
+
 // copy and nth_element approach, large memory footprint
 template <typename InType>
 struct SortQuantiler {
-  using CType = typename InType::c_type;
+  using CType = typename TypeTraits<InType>::CType;
   using Allocator = arrow::stl::allocator<CType>;
 
   Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
@@ -106,8 +117,7 @@ struct SortQuantiler {
     // prepare out array
     // out type depends on options
     const bool is_datapoint = IsDataPoint(options);
-    const std::shared_ptr<DataType> out_type =
-        is_datapoint ? TypeTraits<InType>::type_singleton() : float64();
+    const std::shared_ptr<DataType> out_type = is_datapoint ? datum.type() : float64();
     int64_t out_length = options.q.size();
     if (in_buffer.empty()) {
       return MakeArrayOfNull(out_type, out_length, ctx->memory_pool()).Value(out);
@@ -142,8 +152,9 @@ struct SortQuantiler {
         double* out_buffer = out_data->template GetMutableValues<double>(1);
         for (int64_t i = 0; i < out_length; ++i) {
           const int64_t q_index = q_indices[i];
-          out_buffer[q_index] = GetQuantileByInterp(
-              in_buffer, &last_index, options.q[q_index], options.interpolation);
+          out_buffer[q_index] =
+              GetQuantileByInterp(in_buffer, &last_index, options.q[q_index],
+                                  options.interpolation, *datum.type());
         }
       }
     }
@@ -170,8 +181,8 @@ struct SortQuantiler {
 
   // return quantile interpolated from adjacent input data points
   double GetQuantileByInterp(std::vector<CType, Allocator>& in, uint64_t* last_index,
-                             double q,
-                             enum QuantileOptions::Interpolation interpolation) {
+                             double q, enum QuantileOptions::Interpolation interpolation,
+                             const DataType& in_type) {
     const double index = (in.size() - 1) * q;
     const uint64_t lower_index = static_cast<uint64_t>(index);
     const double fraction = index - lower_index;
@@ -181,7 +192,7 @@ struct SortQuantiler {
       std::nth_element(in.begin(), in.begin() + lower_index, in.begin() + *last_index);
     }
 
-    const double lower_value = static_cast<double>(in[lower_index]);
+    const double lower_value = DataPointToDouble(in[lower_index], in_type);
     if (fraction == 0) {
       *last_index = lower_index;
       return lower_value;
@@ -197,7 +208,7 @@ struct SortQuantiler {
     }
     *last_index = lower_index;
 
-    const double higher_value = static_cast<double>(in[higher_index]);
+    const double higher_value = DataPointToDouble(in[higher_index], in_type);
 
     if (interpolation == QuantileOptions::LINEAR) {
       // more stable than naive linear interpolation
@@ -399,10 +410,15 @@ struct ExactQuantiler<InType, enable_if_t<is_floating_type<InType>::value>> {
   SortQuantiler<InType> impl;
 };
 
+template <typename InType>
+struct ExactQuantiler<InType, enable_if_t<is_decimal_type<InType>::value>> {
+  SortQuantiler<InType> impl;
+};
+
 template <typename T>
 Status ScalarQuantile(KernelContext* ctx, const QuantileOptions& options,
                       const Scalar& scalar, Datum* out) {
-  using CType = typename T::c_type;
+  using CType = typename TypeTraits<T>::CType;
   ArrayData* output = out->mutable_array();
   output->length = options.q.size();
   auto out_type = IsDataPoint(options) ? scalar.type : float64();
@@ -433,7 +449,7 @@ Status ScalarQuantile(KernelContext* ctx, const QuantileOptions& options,
   } else {
     double* out_buffer = output->template GetMutableValues<double>(1);
     for (int64_t i = 0; i < output->length; i++) {
-      out_buffer[i] = static_cast<double>(UnboxScalar<T>::Unbox(scalar));
+      out_buffer[i] = DataPointToDouble(UnboxScalar<T>::Unbox(scalar), *scalar.type);
     }
   }
   return Status::OK();
@@ -486,6 +502,18 @@ void AddQuantileKernels(VectorFunction* func) {
     base.exec = GenerateNumeric<QuantileExecutor, NullType>(*ty);
     DCHECK_OK(func->AddKernel(base));
   }
+  {
+    base.signature =
+        KernelSignature::Make({InputType(Type::DECIMAL128)}, OutputType(ResolveOutput));
+    base.exec = QuantileExecutor<NullType, Decimal128Type>::Exec;
+    DCHECK_OK(func->AddKernel(base));
+  }
+  {
+    base.signature =
+        KernelSignature::Make({InputType(Type::DECIMAL256)}, OutputType(ResolveOutput));
+    base.exec = QuantileExecutor<NullType, Decimal256Type>::Exec;
+    DCHECK_OK(func->AddKernel(base));
+  }
 }
 
 const FunctionDoc quantile_doc{
diff --git a/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc b/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
index 0fddf38f575c9..7c86267d94006 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
@@ -34,13 +34,25 @@ template <typename ArrowType>
 struct TDigestImpl : public ScalarAggregator {
   using ThisType = TDigestImpl<ArrowType>;
   using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
-  using CType = typename ArrowType::c_type;
+  using CType = typename TypeTraits<ArrowType>::CType;
 
-  explicit TDigestImpl(const TDigestOptions& options)
+  TDigestImpl(const TDigestOptions& options, const DataType& in_type)
       : options{options},
         tdigest{options.delta, options.buffer_size},
         count{0},
-        all_valid{true} {}
+        decimal_scale{0},
+        all_valid{true} {
+    if (is_decimal_type<ArrowType>::value) {
+      decimal_scale = checked_cast<const DecimalType&>(in_type).scale();
+    }
+  }
+
+  template <typename T>
+  double ToDouble(T value) const {
+    return static_cast<double>(value);
+  }
+  double ToDouble(const Decimal128& value) const { return value.ToDouble(decimal_scale); }
+  double ToDouble(const Decimal256& value) const { return value.ToDouble(decimal_scale); }
 
   Status Consume(KernelContext*, const ExecBatch& batch) override {
     if (!this->all_valid) return Status::OK();
@@ -57,7 +69,7 @@ struct TDigestImpl : public ScalarAggregator {
         VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
                             [&](int64_t pos, int64_t len) {
                               for (int64_t i = 0; i < len; ++i) {
-                                this->tdigest.NanAdd(values[pos + i]);
+                                this->tdigest.NanAdd(ToDouble(values[pos + i]));
                               }
                             });
       }
@@ -66,7 +78,7 @@ struct TDigestImpl : public ScalarAggregator {
       if (batch[0].scalar()->is_valid) {
         this->count += 1;
         for (int64_t i = 0; i < batch.length; i++) {
-          this->tdigest.NanAdd(value);
+          this->tdigest.NanAdd(ToDouble(value));
         }
       }
     }
@@ -110,6 +122,7 @@ struct TDigestImpl : public ScalarAggregator {
   const TDigestOptions options;
   TDigest tdigest;
   int64_t count;
+  int32_t decimal_scale;
   bool all_valid;
 };
 
@@ -132,8 +145,14 @@ struct TDigestInitState {
   }
 
   template <typename Type>
-  enable_if_t<is_number_type<Type>::value, Status> Visit(const Type&) {
-    state.reset(new TDigestImpl<Type>(options));
+  enable_if_number<Type, Status> Visit(const Type&) {
+    state.reset(new TDigestImpl<Type>(options, in_type));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_decimal<Type, Status> Visit(const Type&) {
+    state.reset(new TDigestImpl<Type>(options, in_type));
     return Status::OK();
   }
 
@@ -154,7 +173,7 @@ void AddTDigestKernels(KernelInit init,
                        const std::vector<std::shared_ptr<DataType>>& types,
                        ScalarAggregateFunction* func) {
   for (const auto& ty : types) {
-    auto sig = KernelSignature::Make({InputType(ty)}, float64());
+    auto sig = KernelSignature::Make({InputType(ty->id())}, float64());
     AddAggKernel(std::move(sig), init, func);
   }
 }
@@ -179,6 +198,7 @@ std::shared_ptr<ScalarAggregateFunction> AddTDigestAggKernels() {
   auto func = std::make_shared<ScalarAggregateFunction>(
       "tdigest", Arity::Unary(), &tdigest_doc, &default_tdigest_options);
   AddTDigestKernels(TDigestInit, NumericTypes(), func.get());
+  AddTDigestKernels(TDigestInit, {decimal128(1, 1), decimal256(1, 1)}, func.get());
   return func;
 }
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc b/cpp/src/arrow/compute/kernels/aggregate_test.cc
index c5355a8f4521f..c8b13862ae361 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc
@@ -2334,6 +2334,26 @@ TEST(TestIndexKernel, Errors) {
 // Mode
 //
 
+template <typename CType>
+void CheckModes(const Datum& array, const ModeOptions options,
+                const std::vector<CType>& expected_modes,
+                const std::vector<int64_t>& expected_counts) {
+  ASSERT_OK_AND_ASSIGN(Datum out, Mode(array, options));
+  ValidateOutput(out);
+  const StructArray out_array(out.array());
+  ASSERT_EQ(out_array.length(), expected_modes.size());
+  ASSERT_EQ(out_array.num_fields(), 2);
+
+  const CType* out_modes = out_array.field(0)->data()->GetValues<CType>(1);
+  const int64_t* out_counts = out_array.field(1)->data()->GetValues<int64_t>(1);
+  for (int i = 0; i < out_array.length(); ++i) {
+    // equal or nan equal
+    ASSERT_TRUE((expected_modes[i] == out_modes[i]) ||
+                (expected_modes[i] != expected_modes[i] && out_modes[i] != out_modes[i]));
+    ASSERT_EQ(expected_counts[i], out_counts[i]);
+  }
+}
+
 template <typename T>
 class TestPrimitiveModeKernel : public ::testing::Test {
  public:
@@ -2344,21 +2364,7 @@ class TestPrimitiveModeKernel : public ::testing::Test {
   void AssertModesAre(const Datum& array, const ModeOptions options,
                       const std::vector<CType>& expected_modes,
                       const std::vector<int64_t>& expected_counts) {
-    ASSERT_OK_AND_ASSIGN(Datum out, Mode(array, options));
-    ValidateOutput(out);
-    const StructArray out_array(out.array());
-    ASSERT_EQ(out_array.length(), expected_modes.size());
-    ASSERT_EQ(out_array.num_fields(), 2);
-
-    const CType* out_modes = out_array.field(0)->data()->GetValues<CType>(1);
-    const int64_t* out_counts = out_array.field(1)->data()->GetValues<int64_t>(1);
-    for (int i = 0; i < out_array.length(); ++i) {
-      // equal or nan equal
-      ASSERT_TRUE(
-          (expected_modes[i] == out_modes[i]) ||
-          (expected_modes[i] != expected_modes[i] && out_modes[i] != out_modes[i]));
-      ASSERT_EQ(expected_counts[i], out_counts[i]);
-    }
+    CheckModes(array, options, expected_modes, expected_counts);
   }
 
   void AssertModesAre(const std::string& json, const int n,
@@ -2587,6 +2593,89 @@ TYPED_TEST(TestFloatingModeKernel, Floats) {
   this->AssertModesEmpty(ScalarFromJSON(in_ty, "null"), ModeOptions(/*n=*/1));
 }
 
+template <typename ArrowType>
+class TestDecimalModeKernel : public ::testing::Test {
+ public:
+  using CType = typename TypeTraits<ArrowType>::CType;
+
+  void AssertModesAre(const Datum& array, const ModeOptions options,
+                      const std::vector<std::string>& expected_modes,
+                      const std::vector<int64_t>& expected_counts) {
+    CheckModes<CType>(array, options, values(expected_modes), expected_counts);
+  }
+
+  CType value(const std::string& s) const {
+    EXPECT_OK_AND_ASSIGN(auto out, CType::FromString(s));
+    return out;
+  }
+
+  std::vector<CType> values(const std::vector<std::string>& strings) const {
+    std::vector<CType> values;
+    for (const auto& s : strings) {
+      values.push_back(value(s));
+    }
+    return values;
+  }
+
+  std::shared_ptr<DataType> type_instance() { return std::make_shared<ArrowType>(4, 2); }
+};
+
+TYPED_TEST_SUITE(TestDecimalModeKernel, DecimalArrowTypes);
+
+TYPED_TEST(TestDecimalModeKernel, Decimals) {
+  auto ty = this->type_instance();
+  this->AssertModesAre(ArrayFromJSON(ty, R"(["5.01", "-1.42", "-1.42", "5.01", "5.01"])"),
+                       ModeOptions(1), {"5.01"}, {3});
+  this->AssertModesAre(
+      ArrayFromJSON(ty, R"(["5.01", "-1.42", "-1.42", "5.01", "5.01", "-1.42"])"),
+      ModeOptions(1), {"-1.42"}, {3});
+  this->AssertModesAre(
+      ArrayFromJSON(ty, R"(["5.01", "-1.42", "-1.42", "5.01", "5.01", "-1.42"])"),
+      ModeOptions(2), {"-1.42", "5.01"}, {3, 3});
+
+  this->AssertModesAre(ArrayFromJSON(ty, "[]"), ModeOptions(1), {}, {});
+
+  this->AssertModesAre(ArrayFromJSON(ty, R"(["1.86", "-2.00", "-2.00", null])"),
+                       ModeOptions(/*n=*/1), {"-2.00"}, {2});
+  this->AssertModesAre(ArrayFromJSON(ty, R"(["1.86", "-2.00", "-2.00", null])"),
+                       ModeOptions(/*n=*/1, /*skip_nulls=*/false), {}, {});
+  this->AssertModesAre(ArrayFromJSON(ty, R"(["1.86", "-2.00", "-2.00", null])"),
+                       ModeOptions(/*n=*/1, /*skip_nulls=*/true, /*min_count=*/3),
+                       {"-2.00"}, {2});
+  this->AssertModesAre(ArrayFromJSON(ty, R"(["-2.00", "-2.00", null])"),
+                       ModeOptions(/*n=*/1, /*skip_nulls=*/true, /*min_count=*/3), {},
+                       {});
+  this->AssertModesAre(ArrayFromJSON(ty, R"(["1.86", "-2.00", "-2.00"])"),
+                       ModeOptions(/*n=*/1, /*skip_nulls=*/false, /*min_count=*/3),
+                       {"-2.00"}, {2});
+  this->AssertModesAre(ArrayFromJSON(ty, R"(["1.86", "-2.00", "-2.00", null])"),
+                       ModeOptions(/*n=*/1, /*skip_nulls=*/false, /*min_count=*/3), {},
+                       {});
+  this->AssertModesAre(ArrayFromJSON(ty, R"(["1.86", "-2.00"])"),
+                       ModeOptions(/*n=*/1, /*skip_nulls=*/false, /*min_count=*/3), {},
+                       {});
+
+  this->AssertModesAre(ScalarFromJSON(ty, R"("0.00")"),
+                       ModeOptions(/*n=*/1, /*skip_nulls=*/false), {"0.00"}, {1});
+  this->AssertModesAre(ScalarFromJSON(ty, "null"),
+                       ModeOptions(/*n=*/1, /*skip_nulls=*/false), {}, {});
+  this->AssertModesAre(ScalarFromJSON(ty, R"("0.00")"),
+                       ModeOptions(/*n=*/1, /*skip_nulls=*/true, /*min_count=*/2), {},
+                       {});
+  this->AssertModesAre(ScalarFromJSON(ty, "null"),
+                       ModeOptions(/*n=*/1, /*skip_nulls=*/true, /*min_count=*/2), {},
+                       {});
+  this->AssertModesAre(ScalarFromJSON(ty, R"("0.00")"),
+                       ModeOptions(/*n=*/1, /*skip_nulls=*/false, /*min_count=*/2), {},
+                       {});
+  this->AssertModesAre(ScalarFromJSON(ty, "null"),
+                       ModeOptions(/*n=*/1, /*skip_nulls=*/false, /*min_count=*/2), {},
+                       {});
+  this->AssertModesAre(ScalarFromJSON(ty, R"("5.00")"), ModeOptions(/*n=*/1), {"5.00"},
+                       {1});
+  this->AssertModesAre(ScalarFromJSON(ty, "null"), ModeOptions(/*n=*/1), {}, {});
+}
+
 TEST_F(TestInt8ModeKernelValueRange, Basics) {
   this->AssertModeIs("[0, 127, -128, -128]", -128, 2);
   this->AssertModeIs("[127, 127, 127]", 127, 3);
@@ -2689,6 +2778,24 @@ TEST_F(TestInt32ModeKernel, Sliced) {
 // Variance/Stddev
 //
 
+void CheckVarStd(const Datum& array, const VarianceOptions& options,
+                 double expected_var) {
+  ASSERT_OK_AND_ASSIGN(Datum out_var, Variance(array, options));
+  ASSERT_OK_AND_ASSIGN(Datum out_std, Stddev(array, options));
+  auto var = checked_cast<const DoubleScalar*>(out_var.scalar().get());
+  auto std = checked_cast<const DoubleScalar*>(out_std.scalar().get());
+  ASSERT_TRUE(var->is_valid && std->is_valid);
+  // Near zero these macros don't work as well
+  // (and MinGW can give results slightly off from zero)
+  if (std::abs(expected_var) < 1e-20) {
+    ASSERT_NEAR(std->value * std->value, var->value, 1e-20);
+    ASSERT_NEAR(var->value, expected_var, 1e-20);
+  } else {
+    ASSERT_DOUBLE_EQ(std->value * std->value, var->value);
+    ASSERT_DOUBLE_EQ(var->value, expected_var);  // < 4ULP
+  }
+}
+
 template <typename ArrowType>
 class TestPrimitiveVarStdKernel : public ::testing::Test {
  public:
@@ -2697,12 +2804,12 @@ class TestPrimitiveVarStdKernel : public ::testing::Test {
 
   void AssertVarStdIs(const Array& array, const VarianceOptions& options,
                       double expected_var) {
-    AssertVarStdIsInternal(array, options, expected_var);
+    CheckVarStd(array, options, expected_var);
   }
 
   void AssertVarStdIs(const std::shared_ptr<ChunkedArray>& array,
                       const VarianceOptions& options, double expected_var) {
-    AssertVarStdIsInternal(array, options, expected_var);
+    CheckVarStd(array, options, expected_var);
   }
 
   void AssertVarStdIs(const std::string& json, const VarianceOptions& options,
@@ -2740,17 +2847,6 @@ class TestPrimitiveVarStdKernel : public ::testing::Test {
   std::shared_ptr<DataType> type_singleton() { return Traits::type_singleton(); }
 
  private:
-  void AssertVarStdIsInternal(const Datum& array, const VarianceOptions& options,
-                              double expected_var) {
-    ASSERT_OK_AND_ASSIGN(Datum out_var, Variance(array, options));
-    ASSERT_OK_AND_ASSIGN(Datum out_std, Stddev(array, options));
-    auto var = checked_cast<const ScalarType*>(out_var.scalar().get());
-    auto std = checked_cast<const ScalarType*>(out_std.scalar().get());
-    ASSERT_TRUE(var->is_valid && std->is_valid);
-    ASSERT_DOUBLE_EQ(std->value * std->value, var->value);
-    ASSERT_DOUBLE_EQ(var->value, expected_var);  // < 4ULP
-  }
-
   void AssertVarStdIsInvalidInternal(const Datum& array, const VarianceOptions& options) {
     ASSERT_OK_AND_ASSIGN(Datum out_var, Variance(array, options));
     ASSERT_OK_AND_ASSIGN(Datum out_std, Stddev(array, options));
@@ -3000,6 +3096,18 @@ TEST_F(TestVarStdKernelIntegerLength, Basics) {
 }
 #endif
 
+TEST(TestVarStdKernel, Decimal) {
+  // Effectively treated as double, sanity check results here
+  for (const auto& ty : {decimal128(3, 2), decimal256(3, 2)}) {
+    CheckVarStd(ArrayFromJSON(ty, R"(["1.00"])"), VarianceOptions(), 0);
+    CheckVarStd(ArrayFromJSON(ty, R"([null, "1.00", "2.00", "3.00"])"), VarianceOptions(),
+                0.6666666666666666);
+    CheckVarStd(ScalarFromJSON(ty, R"("1.00")"), VarianceOptions(), 0);
+    CheckVarStd(ArrayFromJSON(ty, R"([null, "1.00", "2.00"])"),
+                VarianceOptions(/*ddof=*/1), 0.5);
+  }
+}
+
 //
 // Quantile
 //
@@ -3541,6 +3649,24 @@ TEST(TestQuantileKernel, AllNullsOrNaNs) {
   }
 }
 
+TEST(TestQuantileKernel, Decimal) {
+  auto check = [](const std::shared_ptr<Array>& input, QuantileOptions options,
+                  const std::shared_ptr<Array>& expected) {
+    ASSERT_OK_AND_ASSIGN(Datum out, Quantile(input, options));
+    auto out_array = out.make_array();
+    ValidateOutput(*out_array);
+    AssertArraysEqual(*expected, *out_array, /*verbose=*/true);
+  };
+  for (const auto& ty : {decimal128(3, 2), decimal256(3, 2)}) {
+    check(ArrayFromJSON(ty, R"(["1.00", "5.00", null])"),
+          QuantileOptions(0.5, QuantileOptions::LINEAR),
+          ArrayFromJSON(float64(), R"([3.00])"));
+    check(ArrayFromJSON(ty, R"(["1.00", "2.00", "5.00"])"),
+          QuantileOptions(0.5, QuantileOptions::NEAREST),
+          ArrayFromJSON(ty, R"(["2.00"])"));
+  }
+}
+
 TEST(TestQuantileKernel, Scalar) {
   for (const auto& ty : {float64(), int64(), uint64()}) {
     QuantileOptions options(std::vector<double>{0.0, 0.5, 1.0});
@@ -3608,6 +3734,17 @@ TEST(TestTDigestKernel, AllNullsOrNaNs) {
   }
 }
 
+TEST(TestTDigestKernel, Decimal) {
+  for (const auto& ty : {decimal128(3, 2), decimal256(3, 2)}) {
+    ASSERT_OK_AND_ASSIGN(auto decimal_array,
+                         TDigest(ArrayFromJSON(ty, R"(["1.00", "2.00", "3.25"])")));
+    ASSERT_OK_AND_ASSIGN(auto float_array,
+                         TDigest(ArrayFromJSON(float64(), "[1, 2, 3.25]")));
+    AssertArraysApproxEqual(*float_array.make_array(), *decimal_array.make_array(),
+                            /*verbose=*/true);
+  }
+}
+
 TEST(TestTDigestKernel, Scalar) {
   for (const auto& ty : {float64(), int64(), uint64()}) {
     TDigestOptions options(std::vector<double>{0.0, 0.5, 1.0});
diff --git a/cpp/src/arrow/compute/kernels/aggregate_var_std.cc b/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
index d0d3c514fae2e..feb98718aee3c 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
@@ -36,12 +36,21 @@ using arrow::internal::VisitSetBitRunsVoid;
 template <typename ArrowType>
 struct VarStdState {
   using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
-  using CType = typename ArrowType::c_type;
+  using CType = typename TypeTraits<ArrowType>::CType;
   using ThisType = VarStdState<ArrowType>;
 
-  explicit VarStdState(VarianceOptions options) : options(options) {}
+  explicit VarStdState(int32_t decimal_scale, VarianceOptions options)
+      : decimal_scale(decimal_scale), options(options) {}
 
-  // float/double/int64: calculate `m2` (sum((X-mean)^2)) with `two pass algorithm`
+  template <typename T>
+  double ToDouble(T value) const {
+    return static_cast<double>(value);
+  }
+  double ToDouble(const Decimal128& value) const { return value.ToDouble(decimal_scale); }
+  double ToDouble(const Decimal256& value) const { return value.ToDouble(decimal_scale); }
+
+  // float/double/int64/decimal: calculate `m2` (sum((X-mean)^2)) with `two pass
+  // algorithm`
   // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
   template <typename T = ArrowType>
   enable_if_t<is_floating_type<T>::value || (sizeof(CType) > 4)> Consume(
@@ -52,14 +61,13 @@ struct VarStdState {
       return;
     }
 
-    using SumType =
-        typename std::conditional<is_floating_type<T>::value, double, int128_t>::type;
-    SumType sum = SumArray<CType, SumType, SimdLevel::NONE>(*array.data());
+    using SumType = typename internal::GetSumType<T>::SumType;
+    SumType sum = internal::SumArray<CType, SumType, SimdLevel::NONE>(*array.data());
 
-    const double mean = static_cast<double>(sum) / count;
-    const double m2 =
-        SumArray<CType, double, SimdLevel::NONE>(*array.data(), [mean](CType value) {
-          const double v = static_cast<double>(value);
+    const double mean = ToDouble(sum) / count;
+    const double m2 = internal::SumArray<CType, double, SimdLevel::NONE>(
+        *array.data(), [this, mean](CType value) {
+          const double v = ToDouble(value);
           return (v - mean) * (v - mean);
         });
 
@@ -102,7 +110,7 @@ struct VarStdState {
                             });
 
         // merge variance
-        ThisType state(options);
+        ThisType state(decimal_scale, options);
         state.count = var_std.count;
         state.mean = var_std.mean();
         state.m2 = var_std.m2();
@@ -116,7 +124,7 @@ struct VarStdState {
     this->m2 = 0;
     if (scalar.is_valid) {
       this->count = count;
-      this->mean = static_cast<double>(UnboxScalar<ArrowType>::Unbox(scalar));
+      this->mean = ToDouble(UnboxScalar<ArrowType>::Unbox(scalar));
     } else {
       this->count = 0;
       this->mean = 0;
@@ -141,6 +149,7 @@ struct VarStdState {
                 &this->mean, &this->m2);
   }
 
+  const int32_t decimal_scale;
   const VarianceOptions options;
   int64_t count = 0;
   double mean = 0;
@@ -153,9 +162,9 @@ struct VarStdImpl : public ScalarAggregator {
   using ThisType = VarStdImpl<ArrowType>;
   using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
 
-  explicit VarStdImpl(const std::shared_ptr<DataType>& out_type,
+  explicit VarStdImpl(int32_t decimal_scale, const std::shared_ptr<DataType>& out_type,
                       const VarianceOptions& options, VarOrStd return_type)
-      : out_type(out_type), state(options), return_type(return_type) {}
+      : out_type(out_type), state(decimal_scale, options), return_type(return_type) {}
 
   Status Consume(KernelContext*, const ExecBatch& batch) override {
     if (batch[0].is_array()) {
@@ -216,8 +225,16 @@ struct VarStdInitState {
   }
 
   template <typename Type>
-  enable_if_t<is_number_type<Type>::value, Status> Visit(const Type&) {
-    state.reset(new VarStdImpl<Type>(out_type, options, return_type));
+  enable_if_number<Type, Status> Visit(const Type&) {
+    state.reset(
+        new VarStdImpl<Type>(/*decimal_scale=*/0, out_type, options, return_type));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_decimal<Type, Status> Visit(const Type&) {
+    state.reset(new VarStdImpl<Type>(checked_cast<const DecimalType&>(in_type).scale(),
+                                     out_type, options, return_type));
     return Status::OK();
   }
 
@@ -247,7 +264,7 @@ void AddVarStdKernels(KernelInit init,
                       const std::vector<std::shared_ptr<DataType>>& types,
                       ScalarAggregateFunction* func) {
   for (const auto& ty : types) {
-    auto sig = KernelSignature::Make({InputType(ty)}, float64());
+    auto sig = KernelSignature::Make({InputType(ty->id())}, float64());
     AddAggKernel(std::move(sig), init, func);
   }
 }
@@ -275,6 +292,7 @@ std::shared_ptr<ScalarAggregateFunction> AddStddevAggKernels() {
   auto func = std::make_shared<ScalarAggregateFunction>(
       "stddev", Arity::Unary(), &stddev_doc, &default_std_options);
   AddVarStdKernels(StddevInit, NumericTypes(), func.get());
+  AddVarStdKernels(StddevInit, {decimal128(1, 1), decimal256(1, 1)}, func.get());
   return func;
 }
 
@@ -283,6 +301,7 @@ std::shared_ptr<ScalarAggregateFunction> AddVarianceAggKernels() {
   auto func = std::make_shared<ScalarAggregateFunction>(
       "variance", Arity::Unary(), &variance_doc, &default_var_options);
   AddVarStdKernels(VarianceInit, NumericTypes(), func.get());
+  AddVarStdKernels(VarianceInit, {decimal128(1, 1), decimal256(1, 1)}, func.get());
   return func;
 }
 
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index 73c8f9d26c0e0..9f53267535511 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -519,7 +519,8 @@ struct GrouperFastImpl : Grouper {
 /// Implementations should be default constructible and perform initialization in
 /// Init().
 struct GroupedAggregator : KernelState {
-  virtual Status Init(ExecContext*, const FunctionOptions*) = 0;
+  virtual Status Init(ExecContext*, const std::vector<ValueDescr>& inputs,
+                      const FunctionOptions*) = 0;
 
   virtual Status Resize(int64_t new_num_groups) = 0;
 
@@ -536,7 +537,7 @@ template <typename Impl>
 Result<std::unique_ptr<KernelState>> HashAggregateInit(KernelContext* ctx,
                                                        const KernelInitArgs& args) {
   auto impl = ::arrow::internal::make_unique<Impl>();
-  RETURN_NOT_OK(impl->Init(ctx->exec_context(), args.options));
+  RETURN_NOT_OK(impl->Init(ctx->exec_context(), args.inputs, args.options));
   return std::move(impl);
 }
 
@@ -636,7 +637,8 @@ void VisitGroupedValuesNonNull(const ExecBatch& batch, ConsumeValue&& valid_func
 // Count implementation
 
 struct GroupedCountImpl : public GroupedAggregator {
-  Status Init(ExecContext* ctx, const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
+              const FunctionOptions* options) override {
     options_ = checked_cast<const CountOptions&>(*options);
     counts_ = BufferBuilder(ctx->memory_pool());
     return Status::OK();
@@ -725,13 +727,14 @@ struct GroupedReducingAggregator : public GroupedAggregator {
   using CType = typename TypeTraits<AccType>::CType;
   using InputCType = typename TypeTraits<Type>::CType;
 
-  Status Init(ExecContext* ctx, const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const std::vector<ValueDescr>& inputs,
+              const FunctionOptions* options) override {
     pool_ = ctx->memory_pool();
     options_ = checked_cast<const ScalarAggregateOptions&>(*options);
     reduced_ = TypedBufferBuilder<CType>(pool_);
     counts_ = TypedBufferBuilder<int64_t>(pool_);
     no_nulls_ = TypedBufferBuilder<bool>(pool_);
-    // out_type_ initialized by SumInit
+    out_type_ = GetOutType(inputs[0].type);
     return Status::OK();
   }
 
@@ -829,6 +832,18 @@ struct GroupedReducingAggregator : public GroupedAggregator {
 
   std::shared_ptr<DataType> out_type() const override { return out_type_; }
 
+  template <typename T = Type>
+  static enable_if_t<!is_decimal_type<T>::value, std::shared_ptr<DataType>> GetOutType(
+      const std::shared_ptr<DataType>& in_type) {
+    return TypeTraits<AccType>::type_singleton();
+  }
+
+  template <typename T = Type>
+  static enable_if_decimal<T, std::shared_ptr<DataType>> GetOutType(
+      const std::shared_ptr<DataType>& in_type) {
+    return in_type;
+  }
+
   int64_t num_groups_ = 0;
   ScalarAggregateOptions options_;
   TypedBufferBuilder<CType> reduced_;
@@ -838,76 +853,35 @@ struct GroupedReducingAggregator : public GroupedAggregator {
   MemoryPool* pool_;
 };
 
-// ----------------------------------------------------------------------
-// Sum implementation
-
-template <typename Type>
-struct GroupedSumImpl : public GroupedReducingAggregator<Type, GroupedSumImpl<Type>> {
-  using Base = GroupedReducingAggregator<Type, GroupedSumImpl<Type>>;
-  using CType = typename Base::CType;
-  using InputCType = typename Base::InputCType;
-
-  // Default value for a group
-  static CType NullValue(const DataType&) { return CType(0); }
-
-  template <typename T = Type>
-  static enable_if_number<T, CType> Reduce(const DataType&, const CType u,
-                                           const InputCType v) {
-    return static_cast<CType>(to_unsigned(u) + to_unsigned(static_cast<CType>(v)));
-  }
-
-  static CType Reduce(const DataType&, const CType u, const CType v) {
-    return static_cast<CType>(to_unsigned(u) + to_unsigned(v));
-  }
-
-  using Base::Finish;
-};
-
-template <template <typename T> class Impl, typename T>
-Result<std::unique_ptr<KernelState>> SumInit(KernelContext* ctx,
-                                             const KernelInitArgs& args) {
-  ARROW_ASSIGN_OR_RAISE(auto impl, HashAggregateInit<Impl<T>>(ctx, args));
-  static_cast<Impl<T>*>(impl.get())->out_type_ =
-      TypeTraits<typename Impl<T>::AccType>::type_singleton();
-  return std::move(impl);
-}
-
-template <typename Impl>
-Result<std::unique_ptr<KernelState>> DecimalSumInit(KernelContext* ctx,
-                                                    const KernelInitArgs& args) {
-  ARROW_ASSIGN_OR_RAISE(auto impl, HashAggregateInit<Impl>(ctx, args));
-  static_cast<Impl*>(impl.get())->out_type_ = args.inputs[0].type;
-  return std::move(impl);
-}
-
-struct GroupedSumFactory {
+template <template <typename> class Impl, const char* kFriendlyName>
+struct GroupedReducingFactory {
   template <typename T, typename AccType = typename FindAccumulatorType<T>::Type>
   Status Visit(const T&) {
-    kernel = MakeKernel(std::move(argument_type), SumInit<GroupedSumImpl, T>);
+    kernel = MakeKernel(std::move(argument_type), HashAggregateInit<Impl<T>>);
     return Status::OK();
   }
 
   Status Visit(const Decimal128Type&) {
-    kernel = MakeKernel(std::move(argument_type),
-                        DecimalSumInit<GroupedSumImpl<Decimal128Type>>);
+    kernel =
+        MakeKernel(std::move(argument_type), HashAggregateInit<Impl<Decimal128Type>>);
     return Status::OK();
   }
   Status Visit(const Decimal256Type&) {
-    kernel = MakeKernel(std::move(argument_type),
-                        DecimalSumInit<GroupedSumImpl<Decimal256Type>>);
+    kernel =
+        MakeKernel(std::move(argument_type), HashAggregateInit<Impl<Decimal256Type>>);
     return Status::OK();
   }
 
   Status Visit(const HalfFloatType& type) {
-    return Status::NotImplemented("Summing data of type ", type);
+    return Status::NotImplemented("Computing ", kFriendlyName, " of type ", type);
   }
 
   Status Visit(const DataType& type) {
-    return Status::NotImplemented("Summing data of type ", type);
+    return Status::NotImplemented("Computing ", kFriendlyName, " of type ", type);
   }
 
   static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
-    GroupedSumFactory factory;
+    GroupedReducingFactory<Impl, kFriendlyName> factory;
     factory.argument_type = InputType::Array(type->id());
     RETURN_NOT_OK(VisitTypeInline(*type, &factory));
     return std::move(factory.kernel);
@@ -917,6 +891,34 @@ struct GroupedSumFactory {
   InputType argument_type;
 };
 
+// ----------------------------------------------------------------------
+// Sum implementation
+
+template <typename Type>
+struct GroupedSumImpl : public GroupedReducingAggregator<Type, GroupedSumImpl<Type>> {
+  using Base = GroupedReducingAggregator<Type, GroupedSumImpl<Type>>;
+  using CType = typename Base::CType;
+  using InputCType = typename Base::InputCType;
+
+  // Default value for a group
+  static CType NullValue(const DataType&) { return CType(0); }
+
+  template <typename T = Type>
+  static enable_if_number<T, CType> Reduce(const DataType&, const CType u,
+                                           const InputCType v) {
+    return static_cast<CType>(to_unsigned(u) + to_unsigned(static_cast<CType>(v)));
+  }
+
+  static CType Reduce(const DataType&, const CType u, const CType v) {
+    return static_cast<CType>(to_unsigned(u) + to_unsigned(v));
+  }
+
+  using Base::Finish;
+};
+
+static constexpr const char kSumName[] = "sum";
+using GroupedSumFactory = GroupedReducingFactory<GroupedSumImpl, kSumName>;
+
 // ----------------------------------------------------------------------
 // Product implementation
 
@@ -945,43 +947,8 @@ struct GroupedProductImpl final
   using Base::Finish;
 };
 
-struct GroupedProductFactory {
-  template <typename T, typename AccType = typename FindAccumulatorType<T>::Type>
-  Status Visit(const T&) {
-    kernel = MakeKernel(std::move(argument_type), SumInit<GroupedProductImpl, T>);
-    return Status::OK();
-  }
-
-  Status Visit(const Decimal128Type&) {
-    kernel = MakeKernel(std::move(argument_type),
-                        DecimalSumInit<GroupedProductImpl<Decimal128Type>>);
-    return Status::OK();
-  }
-
-  Status Visit(const Decimal256Type&) {
-    kernel = MakeKernel(std::move(argument_type),
-                        DecimalSumInit<GroupedProductImpl<Decimal256Type>>);
-    return Status::OK();
-  }
-
-  Status Visit(const HalfFloatType& type) {
-    return Status::NotImplemented("Taking product of data of type ", type);
-  }
-
-  Status Visit(const DataType& type) {
-    return Status::NotImplemented("Taking product of data of type ", type);
-  }
-
-  static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
-    GroupedProductFactory factory;
-    factory.argument_type = InputType::Array(type->id());
-    RETURN_NOT_OK(VisitTypeInline(*type, &factory));
-    return std::move(factory.kernel);
-  }
-
-  HashAggregateKernel kernel;
-  InputType argument_type;
-};
+static constexpr const char kProductName[] = "product";
+using GroupedProductFactory = GroupedReducingFactory<GroupedProductImpl, kProductName>;
 
 // ----------------------------------------------------------------------
 // Mean implementation
@@ -1040,43 +1007,8 @@ struct GroupedMeanImpl : public GroupedReducingAggregator<Type, GroupedMeanImpl<
   }
 };
 
-struct GroupedMeanFactory {
-  template <typename T, typename AccType = typename FindAccumulatorType<T>::Type>
-  Status Visit(const T&) {
-    kernel = MakeKernel(std::move(argument_type), SumInit<GroupedMeanImpl, T>);
-    return Status::OK();
-  }
-
-  Status Visit(const Decimal128Type&) {
-    kernel = MakeKernel(std::move(argument_type),
-                        DecimalSumInit<GroupedMeanImpl<Decimal128Type>>);
-    return Status::OK();
-  }
-
-  Status Visit(const Decimal256Type&) {
-    kernel = MakeKernel(std::move(argument_type),
-                        DecimalSumInit<GroupedMeanImpl<Decimal256Type>>);
-    return Status::OK();
-  }
-
-  Status Visit(const HalfFloatType& type) {
-    return Status::NotImplemented("Computing mean of type ", type);
-  }
-
-  Status Visit(const DataType& type) {
-    return Status::NotImplemented("Computing mean of type ", type);
-  }
-
-  static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
-    GroupedMeanFactory factory;
-    factory.argument_type = InputType::Array(type->id());
-    RETURN_NOT_OK(VisitTypeInline(*type, &factory));
-    return std::move(factory.kernel);
-  }
-
-  HashAggregateKernel kernel;
-  InputType argument_type;
-};
+static constexpr const char kMeanName[] = "mean";
+using GroupedMeanFactory = GroupedReducingFactory<GroupedMeanImpl, kMeanName>;
 
 // Variance/Stdev implementation
 
@@ -1084,10 +1016,22 @@ using arrow::internal::int128_t;
 
 template <typename Type>
 struct GroupedVarStdImpl : public GroupedAggregator {
-  using CType = typename Type::c_type;
+  using CType = typename TypeTraits<Type>::CType;
+
+  Status Init(ExecContext* ctx, const std::vector<ValueDescr>& inputs,
+              const FunctionOptions* options) override {
+    options_ = *checked_cast<const VarianceOptions*>(options);
+    if (is_decimal_type<Type>::value) {
+      const int32_t scale = checked_cast<const DecimalType&>(*inputs[0].type).scale();
+      return InitInternal(ctx, scale, options);
+    }
+    return InitInternal(ctx, 0, options);
+  }
 
-  Status Init(ExecContext* ctx, const FunctionOptions* options) override {
+  Status InitInternal(ExecContext* ctx, int32_t decimal_scale,
+                      const FunctionOptions* options) {
     options_ = *checked_cast<const VarianceOptions*>(options);
+    decimal_scale_ = decimal_scale;
     ctx_ = ctx;
     pool_ = ctx->memory_pool();
     counts_ = TypedBufferBuilder<int64_t>(pool_);
@@ -1107,18 +1051,28 @@ struct GroupedVarStdImpl : public GroupedAggregator {
     return Status::OK();
   }
 
+  template <typename T>
+  double ToDouble(T value) const {
+    return static_cast<double>(value);
+  }
+  double ToDouble(const Decimal128& value) const {
+    return value.ToDouble(decimal_scale_);
+  }
+  double ToDouble(const Decimal256& value) const {
+    return value.ToDouble(decimal_scale_);
+  }
+
   Status Consume(const ExecBatch& batch) override { return ConsumeImpl(batch); }
 
-  // float/double/int64: calculate `m2` (sum((X-mean)^2)) with `two pass algorithm`
-  // (see aggregate_var_std.cc)
+  // float/double/int64/decimal: calculate `m2` (sum((X-mean)^2)) with
+  // `two pass algorithm` (see aggregate_var_std.cc)
   template <typename T = Type>
   enable_if_t<is_floating_type<T>::value || (sizeof(CType) > 4), Status> ConsumeImpl(
       const ExecBatch& batch) {
-    using SumType =
-        typename std::conditional<is_floating_type<T>::value, double, int128_t>::type;
+    using SumType = typename internal::GetSumType<T>::SumType;
 
     GroupedVarStdImpl<Type> state;
-    RETURN_NOT_OK(state.Init(ctx_, &options_));
+    RETURN_NOT_OK(state.InitInternal(ctx_, decimal_scale_, &options_));
     RETURN_NOT_OK(state.Resize(num_groups_));
     int64_t* counts = state.counts_.mutable_data();
     double* means = state.means_.mutable_data();
@@ -1137,12 +1091,12 @@ struct GroupedVarStdImpl : public GroupedAggregator {
         [&](uint32_t g) { BitUtil::ClearBit(no_nulls, g); });
 
     for (int64_t i = 0; i < num_groups_; i++) {
-      means[i] = static_cast<double>(sums[i]) / counts[i];
+      means[i] = ToDouble(sums[i]) / counts[i];
     }
 
     VisitGroupedValuesNonNull<Type>(
         batch, [&](uint32_t g, typename TypeTraits<Type>::CType value) {
-          const double v = static_cast<double>(value);
+          const double v = ToDouble(value);
           m2s[g] += (v - means[g]) * (v - means[g]);
         });
 
@@ -1192,7 +1146,7 @@ struct GroupedVarStdImpl : public GroupedAggregator {
       var_std.clear();
       var_std.resize(num_groups_);
       GroupedVarStdImpl<Type> state;
-      RETURN_NOT_OK(state.Init(ctx_, &options_));
+      RETURN_NOT_OK(state.InitInternal(ctx_, decimal_scale_, &options_));
       RETURN_NOT_OK(state.Resize(num_groups_));
       int64_t* other_counts = state.counts_.mutable_data();
       double* other_means = state.means_.mutable_data();
@@ -1319,6 +1273,7 @@ struct GroupedVarStdImpl : public GroupedAggregator {
   std::shared_ptr<DataType> out_type() const override { return float64(); }
 
   VarOrStd result_type_;
+  int32_t decimal_scale_;
   VarianceOptions options_;
   int64_t num_groups_ = 0;
   // m2 = count * s2 = sum((X-mean)^2)
@@ -1334,14 +1289,15 @@ Result<std::unique_ptr<KernelState>> VarStdInit(KernelContext* ctx,
                                                 const KernelInitArgs& args) {
   auto impl = ::arrow::internal::make_unique<GroupedVarStdImpl<T>>();
   impl->result_type_ = result_type;
-  RETURN_NOT_OK(impl->Init(ctx->exec_context(), args.options));
+  RETURN_NOT_OK(impl->Init(ctx->exec_context(), args.inputs, args.options));
   return std::move(impl);
 }
 
 template <VarOrStd result_type>
 struct GroupedVarStdFactory {
   template <typename T, typename Enable = enable_if_t<is_integer_type<T>::value ||
-                                                      is_floating_type<T>::value>>
+                                                      is_floating_type<T>::value ||
+                                                      is_decimal_type<T>::value>>
   Status Visit(const T&) {
     kernel = MakeKernel(std::move(argument_type), VarStdInit<T, result_type>);
     return Status::OK();
@@ -1357,7 +1313,7 @@ struct GroupedVarStdFactory {
 
   static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
     GroupedVarStdFactory factory;
-    factory.argument_type = InputType::Array(type);
+    factory.argument_type = InputType::Array(type->id());
     RETURN_NOT_OK(VisitTypeInline(*type, &factory));
     return std::move(factory.kernel);
   }
@@ -1373,10 +1329,16 @@ using arrow::internal::TDigest;
 
 template <typename Type>
 struct GroupedTDigestImpl : public GroupedAggregator {
-  using CType = typename Type::c_type;
+  using CType = typename TypeTraits<Type>::CType;
 
-  Status Init(ExecContext* ctx, const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const std::vector<ValueDescr>& inputs,
+              const FunctionOptions* options) override {
     options_ = *checked_cast<const TDigestOptions*>(options);
+    if (is_decimal_type<Type>::value) {
+      decimal_scale_ = checked_cast<const DecimalType&>(*inputs[0].type).scale();
+    } else {
+      decimal_scale_ = 0;
+    }
     ctx_ = ctx;
     pool_ = ctx->memory_pool();
     counts_ = TypedBufferBuilder<int64_t>(pool_);
@@ -1395,13 +1357,24 @@ struct GroupedTDigestImpl : public GroupedAggregator {
     return Status::OK();
   }
 
+  template <typename T>
+  double ToDouble(T value) const {
+    return static_cast<double>(value);
+  }
+  double ToDouble(const Decimal128& value) const {
+    return value.ToDouble(decimal_scale_);
+  }
+  double ToDouble(const Decimal256& value) const {
+    return value.ToDouble(decimal_scale_);
+  }
+
   Status Consume(const ExecBatch& batch) override {
     int64_t* counts = counts_.mutable_data();
     uint8_t* no_nulls = no_nulls_.mutable_data();
     VisitGroupedValues<Type>(
         batch,
         [&](uint32_t g, CType value) {
-          tdigests_[g].NanAdd(value);
+          tdigests_[g].NanAdd(ToDouble(value));
           counts[g]++;
         },
         [&](uint32_t g) { BitUtil::SetBitTo(no_nulls, g, false); });
@@ -1470,6 +1443,7 @@ struct GroupedTDigestImpl : public GroupedAggregator {
   }
 
   TDigestOptions options_;
+  int32_t decimal_scale_;
   std::vector<TDigest> tdigests_;
   TypedBufferBuilder<int64_t> counts_;
   TypedBufferBuilder<bool> no_nulls_;
@@ -1485,6 +1459,13 @@ struct GroupedTDigestFactory {
     return Status::OK();
   }
 
+  template <typename T>
+  enable_if_decimal<T, Status> Visit(const T&) {
+    kernel =
+        MakeKernel(std::move(argument_type), HashAggregateInit<GroupedTDigestImpl<T>>);
+    return Status::OK();
+  }
+
   Status Visit(const HalfFloatType& type) {
     return Status::NotImplemented("Computing t-digest of data of type ", type);
   }
@@ -1495,7 +1476,7 @@ struct GroupedTDigestFactory {
 
   static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
     GroupedTDigestFactory factory;
-    factory.argument_type = InputType::Array(type);
+    factory.argument_type = InputType::Array(type->id());
     RETURN_NOT_OK(VisitTypeInline(*type, &factory));
     return std::move(factory.kernel);
   }
@@ -1509,15 +1490,14 @@ HashAggregateKernel MakeApproximateMedianKernel(HashAggregateFunction* tdigest_f
   kernel.init = [tdigest_func](
                     KernelContext* ctx,
                     const KernelInitArgs& args) -> Result<std::unique_ptr<KernelState>> {
-    std::vector<ValueDescr> inputs = args.inputs;
-    ARROW_ASSIGN_OR_RAISE(auto kernel, tdigest_func->DispatchBest(&inputs));
+    ARROW_ASSIGN_OR_RAISE(auto kernel, tdigest_func->DispatchExact(args.inputs));
     const auto& scalar_options =
         checked_cast<const ScalarAggregateOptions&>(*args.options);
     TDigestOptions options;
     // Default q = 0.5
     options.min_count = scalar_options.min_count;
     options.skip_nulls = scalar_options.skip_nulls;
-    KernelInitArgs new_args{kernel, inputs, &options};
+    KernelInitArgs new_args{kernel, args.inputs, &options};
     return kernel->init(ctx, new_args);
   };
   kernel.signature =
@@ -1581,7 +1561,8 @@ struct GroupedMinMaxImpl final : public GroupedAggregator {
   using ArrType =
       typename std::conditional<is_boolean_type<Type>::value, uint8_t, CType>::type;
 
-  Status Init(ExecContext* ctx, const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
+              const FunctionOptions* options) override {
     options_ = *checked_cast<const ScalarAggregateOptions*>(options);
     // type_ initialized by MinMaxInit
     mins_ = TypedBufferBuilder<CType>(ctx->memory_pool());
@@ -1678,7 +1659,10 @@ struct GroupedMinMaxImpl final : public GroupedAggregator {
 };
 
 struct GroupedNullMinMaxImpl final : public GroupedAggregator {
-  Status Init(ExecContext* ctx, const FunctionOptions*) override { return Status::OK(); }
+  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
+              const FunctionOptions*) override {
+    return Status::OK();
+  }
 
   Status Resize(int64_t new_num_groups) override {
     num_groups_ = new_num_groups;
@@ -1723,7 +1707,7 @@ HashAggregateKernel MakeMinOrMaxKernel(HashAggregateFunction* min_max_func) {
                     KernelContext* ctx,
                     const KernelInitArgs& args) -> Result<std::unique_ptr<KernelState>> {
     std::vector<ValueDescr> inputs = args.inputs;
-    ARROW_ASSIGN_OR_RAISE(auto kernel, min_max_func->DispatchBest(&inputs));
+    ARROW_ASSIGN_OR_RAISE(auto kernel, min_max_func->DispatchExact(args.inputs));
     KernelInitArgs new_args{kernel, inputs, args.options};
     return kernel->init(ctx, new_args);
   };
@@ -1806,7 +1790,8 @@ struct GroupedMinMaxFactory {
 
 template <typename Impl>
 struct GroupedBooleanAggregator : public GroupedAggregator {
-  Status Init(ExecContext* ctx, const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
+              const FunctionOptions* options) override {
     options_ = checked_cast<const ScalarAggregateOptions&>(*options);
     pool_ = ctx->memory_pool();
     reduced_ = TypedBufferBuilder<bool>(pool_);
@@ -1976,7 +1961,8 @@ struct GroupedAllImpl : public GroupedBooleanAggregator<GroupedAllImpl> {
 // CountDistinct/Distinct implementation
 
 struct GroupedCountDistinctImpl : public GroupedAggregator {
-  Status Init(ExecContext* ctx, const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
+              const FunctionOptions* options) override {
     ctx_ = ctx;
     pool_ = ctx->memory_pool();
     options_ = checked_cast<const CountOptions&>(*options);
@@ -2554,6 +2540,8 @@ void RegisterHashAggregateBasic(FunctionRegistry* registry) {
                                 GroupedVarStdFactory<VarOrStd::Std>::Make, func.get()));
     DCHECK_OK(AddHashAggKernels(FloatingPointTypes(),
                                 GroupedVarStdFactory<VarOrStd::Std>::Make, func.get()));
+    DCHECK_OK(AddHashAggKernels({decimal128(1, 1), decimal256(1, 1)},
+                                GroupedVarStdFactory<VarOrStd::Std>::Make, func.get()));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
 
@@ -2566,6 +2554,8 @@ void RegisterHashAggregateBasic(FunctionRegistry* registry) {
                                 GroupedVarStdFactory<VarOrStd::Var>::Make, func.get()));
     DCHECK_OK(AddHashAggKernels(FloatingPointTypes(),
                                 GroupedVarStdFactory<VarOrStd::Var>::Make, func.get()));
+    DCHECK_OK(AddHashAggKernels({decimal128(1, 1), decimal256(1, 1)},
+                                GroupedVarStdFactory<VarOrStd::Var>::Make, func.get()));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
 
@@ -2579,6 +2569,9 @@ void RegisterHashAggregateBasic(FunctionRegistry* registry) {
         AddHashAggKernels(UnsignedIntTypes(), GroupedTDigestFactory::Make, func.get()));
     DCHECK_OK(
         AddHashAggKernels(FloatingPointTypes(), GroupedTDigestFactory::Make, func.get()));
+    // Type parameters are ignored
+    DCHECK_OK(AddHashAggKernels({decimal128(1, 1), decimal256(1, 1)},
+                                GroupedTDigestFactory::Make, func.get()));
     tdigest_func = func.get();
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
index 412290aa777fc..e53c5d43ca88d 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
@@ -1130,6 +1130,55 @@ TEST(GroupBy, VarianceAndStddev) {
                           /*verbose=*/true);
 }
 
+TEST(GroupBy, VarianceAndStddevDecimal) {
+  auto batch = RecordBatchFromJSON(
+      schema({field("argument0", decimal128(3, 2)), field("argument1", decimal128(3, 2)),
+              field("key", int64())}),
+      R"([
+    ["1.00",  "1.00",  1],
+    [null,    null,    1],
+    ["0.00",  "0.00",  2],
+    ["4.00",  "4.00",  null],
+    ["3.00",  "3.00",  1],
+    ["0.00",  "0.00",  2],
+    ["-1.00", "-1.00", 2],
+    ["1.00",  "1.00",  null]
+  ])");
+
+  ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                       internal::GroupBy(
+                           {
+                               batch->GetColumnByName("argument0"),
+                               batch->GetColumnByName("argument0"),
+                               batch->GetColumnByName("argument1"),
+                               batch->GetColumnByName("argument1"),
+                           },
+                           {
+                               batch->GetColumnByName("key"),
+                           },
+                           {
+                               {"hash_variance", nullptr},
+                               {"hash_stddev", nullptr},
+                               {"hash_variance", nullptr},
+                               {"hash_stddev", nullptr},
+                           }));
+
+  AssertDatumsApproxEqual(ArrayFromJSON(struct_({
+                                            field("hash_variance", float64()),
+                                            field("hash_stddev", float64()),
+                                            field("hash_variance", float64()),
+                                            field("hash_stddev", float64()),
+                                            field("key_0", int64()),
+                                        }),
+                                        R"([
+    [1.0,                 1.0,                1.0,                 1.0,                1],
+    [0.22222222222222224, 0.4714045207910317, 0.22222222222222224, 0.4714045207910317, 2],
+    [2.25,                1.5,                2.25,                1.5,                null]
+  ])"),
+                          aggregated_and_grouped,
+                          /*verbose=*/true);
+}
+
 TEST(GroupBy, TDigest) {
   auto batch = RecordBatchFromJSON(
       schema({field("argument", float64()), field("key", int64())}), R"([
@@ -1201,6 +1250,48 @@ TEST(GroupBy, TDigest) {
       /*verbose=*/true);
 }
 
+TEST(GroupBy, TDigestDecimal) {
+  auto batch = RecordBatchFromJSON(
+      schema({field("argument0", decimal128(3, 2)), field("argument1", decimal256(3, 2)),
+              field("key", int64())}),
+      R"([
+    ["1.01",  "1.01",  1],
+    [null,    null,    1],
+    ["0.00",  "0.00",  2],
+    ["4.42",  "4.42",  null],
+    ["3.86",  "3.86",  1],
+    ["0.00",  "0.00",  2],
+    ["-1.93", "-1.93", 2],
+    ["1.85",  "1.85",  null]
+  ])");
+
+  ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                       internal::GroupBy(
+                           {
+                               batch->GetColumnByName("argument0"),
+                               batch->GetColumnByName("argument1"),
+                           },
+                           {batch->GetColumnByName("key")},
+                           {
+                               {"hash_tdigest", nullptr},
+                               {"hash_tdigest", nullptr},
+                           }));
+
+  AssertDatumsApproxEqual(
+      ArrayFromJSON(struct_({
+                        field("hash_tdigest", fixed_size_list(float64(), 1)),
+                        field("hash_tdigest", fixed_size_list(float64(), 1)),
+                        field("key_0", int64()),
+                    }),
+                    R"([
+    [[1.01], [1.01], 1],
+    [[0.0],  [0.0],  2],
+    [[1.85], [1.85], null]
+  ])"),
+      aggregated_and_grouped,
+      /*verbose=*/true);
+}
+
 TEST(GroupBy, ApproximateMedian) {
   for (const auto& type : {float64(), int8()}) {
     auto batch =
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index 1a60ed31e0a47..db122ca81f3eb 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -71,6 +71,12 @@ struct AbsoluteValue {
                                                                Status* st) {
     return (arg < 0) ? arrow::internal::SafeSignedNegate(arg) : arg;
   }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_decimal_value<Arg, T> Call(KernelContext*, Arg arg,
+                                                        Status*) {
+    return arg.Abs();
+  }
 };
 
 struct AbsoluteValueChecked {
@@ -98,6 +104,12 @@ struct AbsoluteValueChecked {
     static_assert(std::is_same<T, Arg>::value, "");
     return std::fabs(arg);
   }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_decimal_value<Arg, T> Call(KernelContext*, Arg arg,
+                                                        Status*) {
+    return arg.Abs();
+  }
 };
 
 struct Add {
@@ -363,6 +375,12 @@ struct Negate {
                                                           Status*) {
     return arrow::internal::SafeSignedNegate(arg);
   }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_decimal_value<Arg, T> Call(KernelContext*, Arg arg,
+                                                        Status*) {
+    return arg.Negate();
+  }
 };
 
 struct NegateChecked {
@@ -392,6 +410,12 @@ struct NegateChecked {
     static_assert(std::is_same<T, Arg>::value, "");
     return -arg;
   }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_decimal_value<Arg, T> Call(KernelContext*, Arg arg,
+                                                        Status*) {
+    return arg.Negate();
+  }
 };
 
 struct Power {
@@ -475,6 +499,12 @@ struct Sign {
                                                                Status*) {
     return (arg > 0) ? 1 : ((arg == 0) ? 0 : -1);
   }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_decimal_value<Arg, T> Call(KernelContext*, Arg arg,
+                                                        Status*) {
+    return (arg == 0) ? 0 : arg.Sign();
+  }
 };
 
 // Bitwise operations
@@ -1583,8 +1613,18 @@ Result<ValueDescr> ResolveDecimalDivisionOutput(KernelContext*,
 }
 
 template <typename Op>
-void AddDecimalBinaryKernels(const std::string& name,
-                             std::shared_ptr<ScalarFunction>* func) {
+void AddDecimalUnaryKernels(ScalarFunction* func) {
+  OutputType out_type(FirstType);
+  auto in_type128 = InputType(Type::DECIMAL128);
+  auto in_type256 = InputType(Type::DECIMAL256);
+  auto exec128 = ScalarUnaryNotNull<Decimal128Type, Decimal128Type, Op>::Exec;
+  auto exec256 = ScalarUnaryNotNull<Decimal256Type, Decimal256Type, Op>::Exec;
+  DCHECK_OK(func->AddKernel({in_type128}, out_type, exec128));
+  DCHECK_OK(func->AddKernel({in_type256}, out_type, exec256));
+}
+
+template <typename Op>
+void AddDecimalBinaryKernels(const std::string& name, ScalarFunction* func) {
   OutputType out_type(null());
   const std::string op = name.substr(0, name.find("_"));
   if (op == "add" || op == "subtract") {
@@ -1601,8 +1641,8 @@ void AddDecimalBinaryKernels(const std::string& name,
   auto in_type256 = InputType(Type::DECIMAL256);
   auto exec128 = ScalarBinaryNotNullEqualTypes<Decimal128Type, Decimal128Type, Op>::Exec;
   auto exec256 = ScalarBinaryNotNullEqualTypes<Decimal256Type, Decimal256Type, Op>::Exec;
-  DCHECK_OK((*func)->AddKernel({in_type128, in_type128}, out_type, exec128));
-  DCHECK_OK((*func)->AddKernel({in_type256, in_type256}, out_type, exec256));
+  DCHECK_OK(func->AddKernel({in_type128, in_type128}, out_type, exec128));
+  DCHECK_OK(func->AddKernel({in_type256, in_type256}, out_type, exec256));
 }
 
 // Generate a kernel given an arithmetic functor
@@ -1683,6 +1723,36 @@ struct ArithmeticFunction : ScalarFunction {
   }
 };
 
+/// An ArithmeticFunction that promotes only decimal arguments to double.
+struct ArithmeticDecimalToFloatingPointFunction : public ArithmeticFunction {
+  using ArithmeticFunction::ArithmeticFunction;
+
+  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+    RETURN_NOT_OK(CheckArity(*values));
+
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+
+    EnsureDictionaryDecoded(values);
+
+    if (values->size() == 2) {
+      ReplaceNullWithOtherType(values);
+    }
+
+    for (auto& descr : *values) {
+      if (is_decimal(descr.type->id())) {
+        descr.type = float64();
+      }
+    }
+    if (auto type = CommonNumeric(*values)) {
+      ReplaceTypes(type, values);
+    }
+
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
 /// An ArithmeticFunction that promotes only integer arguments to double.
 struct ArithmeticIntegerToFloatingPointFunction : public ArithmeticFunction {
   using ArithmeticFunction::ArithmeticFunction;
@@ -1714,13 +1784,12 @@ struct ArithmeticIntegerToFloatingPointFunction : public ArithmeticFunction {
   }
 };
 
-/// An ArithmeticFunction that promotes integer arguments to double.
+/// An ArithmeticFunction that promotes integer and decimal arguments to double.
 struct ArithmeticFloatingPointFunction : public ArithmeticFunction {
   using ArithmeticFunction::ArithmeticFunction;
 
   Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
     RETURN_NOT_OK(CheckArity(*values));
-    RETURN_NOT_OK(CheckDecimals(values));
 
     using arrow::compute::detail::DispatchExactImpl;
     if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
@@ -1732,7 +1801,7 @@ struct ArithmeticFloatingPointFunction : public ArithmeticFunction {
     }
 
     for (auto& descr : *values) {
-      if (is_integer(descr.type->id())) {
+      if (is_integer(descr.type->id()) || is_decimal(descr.type->id())) {
         descr.type = float64();
       }
     }
@@ -1755,10 +1824,10 @@ void AddNullExec(ScalarFunction* func) {
   DCHECK_OK(func->AddKernel(std::move(input_types), OutputType(null()), NullToNullExec));
 }
 
-template <typename Op>
+template <typename Op, typename FunctionImpl = ArithmeticFunction>
 std::shared_ptr<ScalarFunction> MakeArithmeticFunction(std::string name,
                                                        const FunctionDoc* doc) {
-  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Binary(), doc);
+  auto func = std::make_shared<FunctionImpl>(name, Arity::Binary(), doc);
   for (const auto& ty : NumericTypes()) {
     auto exec = ArithmeticExecFromOp<ScalarBinaryEqualTypes, Op>(ty);
     DCHECK_OK(func->AddKernel({ty, ty}, ty, exec));
@@ -1769,10 +1838,10 @@ std::shared_ptr<ScalarFunction> MakeArithmeticFunction(std::string name,
 
 // Like MakeArithmeticFunction, but for arithmetic ops that need to run
 // only on non-null output.
-template <typename Op>
+template <typename Op, typename FunctionImpl = ArithmeticFunction>
 std::shared_ptr<ScalarFunction> MakeArithmeticFunctionNotNull(std::string name,
                                                               const FunctionDoc* doc) {
-  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Binary(), doc);
+  auto func = std::make_shared<FunctionImpl>(name, Arity::Binary(), doc);
   for (const auto& ty : NumericTypes()) {
     auto exec = ArithmeticExecFromOp<ScalarBinaryNotNullEqualTypes, Op>(ty);
     DCHECK_OK(func->AddKernel({ty, ty}, ty, exec));
@@ -1805,6 +1874,12 @@ std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunctionWithFixedIntOutType(
     auto exec = GenerateArithmeticWithFixedIntOutType<ScalarUnary, IntOutType, Op>(ty);
     DCHECK_OK(func->AddKernel({ty}, out_ty, exec));
   }
+  {
+    auto exec = ScalarUnary<Int64Type, Decimal128Type, Op>::Exec;
+    DCHECK_OK(func->AddKernel({InputType(Type::DECIMAL128)}, int64(), exec));
+    exec = ScalarUnary<Int64Type, Decimal256Type, Op>::Exec;
+    DCHECK_OK(func->AddKernel({InputType(Type::DECIMAL256)}, int64(), exec));
+  }
   AddNullExec(func.get());
   return func;
 }
@@ -2338,27 +2413,29 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) {
   // ----------------------------------------------------------------------
   auto absolute_value =
       MakeUnaryArithmeticFunction<AbsoluteValue>("abs", &absolute_value_doc);
+  AddDecimalUnaryKernels<AbsoluteValue>(absolute_value.get());
   DCHECK_OK(registry->AddFunction(std::move(absolute_value)));
 
   // ----------------------------------------------------------------------
   auto absolute_value_checked = MakeUnaryArithmeticFunctionNotNull<AbsoluteValueChecked>(
       "abs_checked", &absolute_value_checked_doc);
+  AddDecimalUnaryKernels<AbsoluteValueChecked>(absolute_value_checked.get());
   DCHECK_OK(registry->AddFunction(std::move(absolute_value_checked)));
 
   // ----------------------------------------------------------------------
   auto add = MakeArithmeticFunction<Add>("add", &add_doc);
-  AddDecimalBinaryKernels<Add>("add", &add);
+  AddDecimalBinaryKernels<Add>("add", add.get());
   DCHECK_OK(registry->AddFunction(std::move(add)));
 
   // ----------------------------------------------------------------------
   auto add_checked =
       MakeArithmeticFunctionNotNull<AddChecked>("add_checked", &add_checked_doc);
-  AddDecimalBinaryKernels<AddChecked>("add_checked", &add_checked);
+  AddDecimalBinaryKernels<AddChecked>("add_checked", add_checked.get());
   DCHECK_OK(registry->AddFunction(std::move(add_checked)));
 
   // ----------------------------------------------------------------------
   auto subtract = MakeArithmeticFunction<Subtract>("subtract", &sub_doc);
-  AddDecimalBinaryKernels<Subtract>("subtract", &subtract);
+  AddDecimalBinaryKernels<Subtract>("subtract", subtract.get());
 
   // Add subtract(timestamp, timestamp) -> duration
   for (auto unit : TimeUnit::values()) {
@@ -2372,47 +2449,52 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) {
   // ----------------------------------------------------------------------
   auto subtract_checked = MakeArithmeticFunctionNotNull<SubtractChecked>(
       "subtract_checked", &sub_checked_doc);
-  AddDecimalBinaryKernels<SubtractChecked>("subtract_checked", &subtract_checked);
+  AddDecimalBinaryKernels<SubtractChecked>("subtract_checked", subtract_checked.get());
   DCHECK_OK(registry->AddFunction(std::move(subtract_checked)));
 
   // ----------------------------------------------------------------------
   auto multiply = MakeArithmeticFunction<Multiply>("multiply", &mul_doc);
-  AddDecimalBinaryKernels<Multiply>("multiply", &multiply);
+  AddDecimalBinaryKernels<Multiply>("multiply", multiply.get());
   DCHECK_OK(registry->AddFunction(std::move(multiply)));
 
   // ----------------------------------------------------------------------
   auto multiply_checked = MakeArithmeticFunctionNotNull<MultiplyChecked>(
       "multiply_checked", &mul_checked_doc);
-  AddDecimalBinaryKernels<MultiplyChecked>("multiply_checked", &multiply_checked);
+  AddDecimalBinaryKernels<MultiplyChecked>("multiply_checked", multiply_checked.get());
   DCHECK_OK(registry->AddFunction(std::move(multiply_checked)));
 
   // ----------------------------------------------------------------------
   auto divide = MakeArithmeticFunctionNotNull<Divide>("divide", &div_doc);
-  AddDecimalBinaryKernels<Divide>("divide", &divide);
+  AddDecimalBinaryKernels<Divide>("divide", divide.get());
   DCHECK_OK(registry->AddFunction(std::move(divide)));
 
   // ----------------------------------------------------------------------
   auto divide_checked =
       MakeArithmeticFunctionNotNull<DivideChecked>("divide_checked", &div_checked_doc);
-  AddDecimalBinaryKernels<DivideChecked>("divide_checked", &divide_checked);
+  AddDecimalBinaryKernels<DivideChecked>("divide_checked", divide_checked.get());
   DCHECK_OK(registry->AddFunction(std::move(divide_checked)));
 
   // ----------------------------------------------------------------------
   auto negate = MakeUnaryArithmeticFunction<Negate>("negate", &negate_doc);
+  AddDecimalUnaryKernels<Negate>(negate.get());
   DCHECK_OK(registry->AddFunction(std::move(negate)));
 
   // ----------------------------------------------------------------------
   auto negate_checked = MakeUnarySignedArithmeticFunctionNotNull<NegateChecked>(
       "negate_checked", &negate_checked_doc);
+  AddDecimalUnaryKernels<NegateChecked>(negate_checked.get());
   DCHECK_OK(registry->AddFunction(std::move(negate_checked)));
 
   // ----------------------------------------------------------------------
-  auto power = MakeArithmeticFunction<Power>("power", &pow_doc);
+  auto power = MakeArithmeticFunction<Power, ArithmeticDecimalToFloatingPointFunction>(
+      "power", &pow_doc);
   DCHECK_OK(registry->AddFunction(std::move(power)));
 
   // ----------------------------------------------------------------------
   auto power_checked =
-      MakeArithmeticFunctionNotNull<PowerChecked>("power_checked", &pow_checked_doc);
+      MakeArithmeticFunctionNotNull<PowerChecked,
+                                    ArithmeticDecimalToFloatingPointFunction>(
+          "power_checked", &pow_checked_doc);
   DCHECK_OK(registry->AddFunction(std::move(power_checked)));
 
   // ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index 09681b2763bee..52414042e926f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -294,7 +294,7 @@ class TestArithmeticDecimal : public ::testing::Test {
   }
 
   void CheckRaises(const std::string& func, const DatumVector& args,
-                   const std::string& substr, FunctionOptions* options = nullptr) {
+                   const std::string& substr, const FunctionOptions* options = nullptr) {
     EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr(substr),
                                     CallFunction(func, args, options));
   }
@@ -1487,6 +1487,83 @@ TYPED_TEST(TestUnaryArithmeticFloating, AbsoluteValue) {
 
 class TestUnaryArithmeticDecimal : public TestArithmeticDecimal {};
 
+TEST_F(TestUnaryArithmeticDecimal, AbsoluteValue) {
+  auto max128 = Decimal128::GetMaxValue(38);
+  auto max256 = Decimal256::GetMaxValue(76);
+  for (const auto& func : {"abs", "abs_checked"}) {
+    for (const auto& ty : PositiveScaleTypes()) {
+      CheckScalar(func, {ArrayFromJSON(ty, R"([])")}, ArrayFromJSON(ty, R"([])"));
+      CheckScalar(func, {ArrayFromJSON(ty, R"(["1.00", "-42.15", null])")},
+                  ArrayFromJSON(ty, R"(["1.00", "42.15", null])"));
+    }
+    CheckScalar(func, {std::make_shared<Decimal128Scalar>(-max128, decimal128(38, 0))},
+                std::make_shared<Decimal128Scalar>(max128, decimal128(38, 0)));
+    CheckScalar(func, {std::make_shared<Decimal256Scalar>(-max256, decimal256(76, 0))},
+                std::make_shared<Decimal256Scalar>(max256, decimal256(76, 0)));
+    for (const auto& ty : NegativeScaleTypes()) {
+      CheckScalar(func, {ArrayFromJSON(ty, R"([])")}, ArrayFromJSON(ty, R"([])"));
+      CheckScalar(func, {DecimalArrayFromJSON(ty, R"(["12E2", "-42E2", null])")},
+                  DecimalArrayFromJSON(ty, R"(["12E2", "42E2", null])"));
+    }
+  }
+}
+
+TEST_F(TestUnaryArithmeticDecimal, Log) {
+  std::vector<std::string> unchecked = {"ln", "log2", "log10", "log1p"};
+  std::vector<std::string> checked = {"ln_checked", "log2_checked", "log10_checked",
+                                      "log1p_checked"};
+  std::vector<std::string> all = unchecked;
+  all.insert(all.end(), checked.begin(), checked.end());
+
+  for (const auto& func : all) {
+    for (const auto& ty : PositiveScaleTypes()) {
+      CheckDecimalToFloat(func, {ArrayFromJSON(ty, R"([])")});
+      CheckDecimalToFloat(func, {ArrayFromJSON(ty, R"(["0.01", "1.00", "4.42", null])")});
+    }
+    for (const auto& ty : NegativeScaleTypes()) {
+      CheckDecimalToFloat(func, {ArrayFromJSON(ty, R"([])")});
+      CheckDecimalToFloat(func, {DecimalArrayFromJSON(ty, R"(["12E2", "42E2", null])")});
+    }
+  }
+
+  for (const auto& func : unchecked) {
+    for (const auto& ty : PositiveScaleTypes()) {
+      CheckDecimalToFloat(func, {ArrayFromJSON(ty, R"(["-2.00"])")});
+    }
+  }
+  for (const auto& func : checked) {
+    for (const auto& ty : PositiveScaleTypes()) {
+      CheckRaises(func, {DecimalArrayFromJSON(ty, R"(["-2.00"])")},
+                  "logarithm of negative number");
+    }
+  }
+}
+
+TEST_F(TestUnaryArithmeticDecimal, Negate) {
+  auto max128 = Decimal128::GetMaxValue(38);
+  auto max256 = Decimal256::GetMaxValue(76);
+  for (const auto& func : {"negate", "negate_checked"}) {
+    for (const auto& ty : PositiveScaleTypes()) {
+      CheckScalar(func, {ArrayFromJSON(ty, R"([])")}, ArrayFromJSON(ty, R"([])"));
+      CheckScalar(func, {ArrayFromJSON(ty, R"(["0.00", "1.00", "-42.15", null])")},
+                  ArrayFromJSON(ty, R"(["0.00", "-1.00", "42.15", null])"));
+    }
+    CheckScalar(func, {std::make_shared<Decimal128Scalar>(-max128, decimal128(38, 0))},
+                std::make_shared<Decimal128Scalar>(max128, decimal128(38, 0)));
+    CheckScalar(func, {std::make_shared<Decimal128Scalar>(max128, decimal128(38, 0))},
+                std::make_shared<Decimal128Scalar>(-max128, decimal128(38, 0)));
+    CheckScalar(func, {std::make_shared<Decimal256Scalar>(-max256, decimal256(76, 0))},
+                std::make_shared<Decimal256Scalar>(max256, decimal256(76, 0)));
+    CheckScalar(func, {std::make_shared<Decimal256Scalar>(max256, decimal256(76, 0))},
+                std::make_shared<Decimal256Scalar>(-max256, decimal256(76, 0)));
+    for (const auto& ty : NegativeScaleTypes()) {
+      CheckScalar(func, {ArrayFromJSON(ty, R"([])")}, ArrayFromJSON(ty, R"([])"));
+      CheckScalar(func, {DecimalArrayFromJSON(ty, R"(["0", "12E2", "-42E2", null])")},
+                  DecimalArrayFromJSON(ty, R"(["0", "-12E2", "42E2", null])"));
+    }
+  }
+}
+
 // Check two modes exhaustively, give all modes a simple test
 TEST_F(TestUnaryArithmeticDecimal, Round) {
   const auto func = "round";
@@ -1973,6 +2050,116 @@ TEST_F(TestUnaryArithmeticDecimal, RoundToMultipleHalfToOdd) {
   }
 }
 
+TEST_F(TestUnaryArithmeticDecimal, Sign) {
+  auto max128 = Decimal128::GetMaxValue(38);
+  auto max256 = Decimal256::GetMaxValue(76);
+  const auto func = "sign";
+  for (const auto& ty : PositiveScaleTypes()) {
+    CheckScalar(func, {ArrayFromJSON(ty, R"([])")}, ArrayFromJSON(int64(), "[]"));
+    CheckScalar(func, {ArrayFromJSON(ty, R"(["1.00", "0.00", "-42.15", null])")},
+                ArrayFromJSON(int64(), "[1, 0, -1, null]"));
+  }
+  CheckScalar(func, {std::make_shared<Decimal128Scalar>(max128, decimal128(38, 0))},
+              ScalarFromJSON(int64(), "1"));
+  CheckScalar(func, {std::make_shared<Decimal128Scalar>(-max128, decimal128(38, 0))},
+              ScalarFromJSON(int64(), "-1"));
+  CheckScalar(func, {std::make_shared<Decimal256Scalar>(max256, decimal256(76, 0))},
+              ScalarFromJSON(int64(), "1"));
+  CheckScalar(func, {std::make_shared<Decimal256Scalar>(-max256, decimal256(76, 0))},
+              ScalarFromJSON(int64(), "-1"));
+  for (const auto& ty : NegativeScaleTypes()) {
+    CheckScalar(func, {ArrayFromJSON(ty, R"([])")}, ArrayFromJSON(int64(), "[]"));
+    CheckScalar(func, {DecimalArrayFromJSON(ty, R"(["12E2", "0", "-42E2", null])")},
+                ArrayFromJSON(int64(), "[1, 0, -1, null]"));
+  }
+}
+
+TEST_F(TestUnaryArithmeticDecimal, TrigAcos) {
+  for (const auto& func : {"acos", "acos_checked"}) {
+    for (const auto& ty : PositiveScaleTypes()) {
+      CheckDecimalToFloat(func, {ArrayFromJSON(ty, R"([])")});
+      CheckDecimalToFloat(func,
+                          {ArrayFromJSON(ty, R"(["0.00", "-1.00", "1.00", null])")});
+    }
+  }
+  for (const auto& ty : NegativeScaleTypes()) {
+    CheckDecimalToFloat("acos", {DecimalArrayFromJSON(ty, R"(["12E2", "-42E2", null])")});
+    CheckRaises("acos_checked", {DecimalArrayFromJSON(ty, R"(["12E2", "-42E2", null])")},
+                "domain error");
+  }
+}
+
+TEST_F(TestUnaryArithmeticDecimal, TrigAsin) {
+  for (const auto& func : {"asin", "asin_checked"}) {
+    for (const auto& ty : PositiveScaleTypes()) {
+      CheckDecimalToFloat(func, {ArrayFromJSON(ty, R"([])")});
+      CheckDecimalToFloat(func,
+                          {ArrayFromJSON(ty, R"(["0.00", "-1.00", "1.00", null])")});
+    }
+  }
+  for (const auto& ty : NegativeScaleTypes()) {
+    CheckDecimalToFloat("asin", {DecimalArrayFromJSON(ty, R"(["12E2", "-42E2", null])")});
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid, ::testing::HasSubstr("domain error"),
+        CallFunction("asin_checked",
+                     {DecimalArrayFromJSON(ty, R"(["12E2", "-42E2", null])")}));
+  }
+}
+
+TEST_F(TestUnaryArithmeticDecimal, TrigAtan) {
+  const auto func = "atan";
+  for (const auto& ty : PositiveScaleTypes()) {
+    CheckDecimalToFloat(func, {ArrayFromJSON(ty, R"([])")});
+    CheckDecimalToFloat(func, {ArrayFromJSON(ty, R"(["0.00", "-1.00", "1.00", null])")});
+  }
+  for (const auto& ty : NegativeScaleTypes()) {
+    CheckDecimalToFloat(func, {ArrayFromJSON(ty, R"([])")});
+    CheckDecimalToFloat(func, {DecimalArrayFromJSON(ty, R"(["12E2", "-42E2", null])")});
+  }
+}
+
+TEST_F(TestUnaryArithmeticDecimal, TrigCos) {
+  for (const auto& func : {"cos", "cos_checked"}) {
+    for (const auto& ty : PositiveScaleTypes()) {
+      CheckDecimalToFloat(func, {ArrayFromJSON(ty, R"([])")});
+      CheckDecimalToFloat(func,
+                          {ArrayFromJSON(ty, R"(["0.00", "-1.00", "1.00", null])")});
+    }
+    for (const auto& ty : NegativeScaleTypes()) {
+      CheckDecimalToFloat(func, {ArrayFromJSON(ty, R"([])")});
+      CheckDecimalToFloat(func, {DecimalArrayFromJSON(ty, R"(["12E2", "-42E2", null])")});
+    }
+  }
+}
+
+TEST_F(TestUnaryArithmeticDecimal, TrigSin) {
+  for (const auto& func : {"sin", "sin_checked"}) {
+    for (const auto& ty : PositiveScaleTypes()) {
+      CheckDecimalToFloat(func, {ArrayFromJSON(ty, R"([])")});
+      CheckDecimalToFloat(func,
+                          {ArrayFromJSON(ty, R"(["0.00", "-1.00", "1.00", null])")});
+    }
+    for (const auto& ty : NegativeScaleTypes()) {
+      CheckDecimalToFloat(func, {ArrayFromJSON(ty, R"([])")});
+      CheckDecimalToFloat(func, {DecimalArrayFromJSON(ty, R"(["12E2", "-42E2", null])")});
+    }
+  }
+}
+
+TEST_F(TestUnaryArithmeticDecimal, TrigTan) {
+  for (const auto& func : {"tan", "tan_checked"}) {
+    for (const auto& ty : PositiveScaleTypes()) {
+      CheckDecimalToFloat(func, {ArrayFromJSON(ty, R"([])")});
+      CheckDecimalToFloat(func,
+                          {ArrayFromJSON(ty, R"(["0.00", "-1.00", "1.00", null])")});
+    }
+    for (const auto& ty : NegativeScaleTypes()) {
+      CheckDecimalToFloat(func, {ArrayFromJSON(ty, R"([])")});
+      CheckDecimalToFloat(func, {DecimalArrayFromJSON(ty, R"(["12E2", "-42E2", null])")});
+    }
+  }
+}
+
 TYPED_TEST_SUITE(TestUnaryRoundIntegral, IntegralTypes);
 TYPED_TEST_SUITE(TestUnaryRoundSigned, SignedIntegerTypes);
 TYPED_TEST_SUITE(TestUnaryRoundUnsigned, UnsignedIntegerTypes);
@@ -2179,7 +2366,9 @@ TYPED_TEST(TestUnaryRoundToMultipleFloating, RoundToMultiple) {
   this->AssertUnaryOpRaises(RoundToMultiple, values, "multiple must be positive");
 }
 
-TEST(TestBinaryDecimalArithmetic, DispatchBest) {
+class TestBinaryArithmeticDecimal : public TestArithmeticDecimal {};
+
+TEST_F(TestBinaryArithmeticDecimal, DispatchBest) {
   // decimal, floating point
   for (std::string name : {"add", "subtract", "multiply", "divide"}) {
     for (std::string suffix : {"", "_checked"}) {
@@ -2269,10 +2458,18 @@ TEST(TestBinaryDecimalArithmetic, DispatchBest) {
                         {decimal128(5, 4), decimal128(2, 0)});
     }
   }
+  for (std::string name : {"atan2", "logb", "logb_checked", "power", "power_checked"}) {
+    CheckDispatchBest(name, {decimal128(2, 1), decimal128(2, 1)}, {float64(), float64()});
+    CheckDispatchBest(name, {decimal256(2, 1), decimal256(2, 1)}, {float64(), float64()});
+    CheckDispatchBest(name, {decimal128(2, 1), int64()}, {float64(), float64()});
+    CheckDispatchBest(name, {int32(), decimal128(2, 1)}, {float64(), float64()});
+    CheckDispatchBest(name, {decimal128(2, 1), float64()}, {float64(), float64()});
+    CheckDispatchBest(name, {float32(), decimal128(2, 1)}, {float64(), float64()});
+  }
 }
 
 // reference result from bc (precsion=100, scale=40)
-TEST(TestBinaryArithmeticDecimal, AddSubtract) {
+TEST_F(TestBinaryArithmeticDecimal, AddSubtract) {
   // array array, decimal128
   {
     auto left = ArrayFromJSON(decimal128(30, 3),
@@ -2386,7 +2583,14 @@ TEST(TestBinaryArithmeticDecimal, AddSubtract) {
     CheckScalarBinary("add", right, left, added);
   }
 
-  // TODO: decimal integer
+  // decimal integer
+  {
+    auto left = ScalarFromJSON(decimal128(3, 0), R"("666")");
+    auto right = ScalarFromJSON(int64(), "888");
+    CheckScalarBinary("add", left, right, ScalarFromJSON(decimal128(20, 0), R"("1554")"));
+    CheckScalarBinary("subtract", left, right,
+                      ScalarFromJSON(decimal128(20, 0), R"("-222")"));
+  }
 
   // failed case: result maybe overflow
   {
@@ -2404,7 +2608,7 @@ TEST(TestBinaryArithmeticDecimal, AddSubtract) {
   }
 }
 
-TEST(TestBinaryArithmeticDecimal, Multiply) {
+TEST_F(TestBinaryArithmeticDecimal, Multiply) {
   // array array, decimal128
   {
     auto left = ArrayFromJSON(decimal128(20, 10),
@@ -2428,7 +2632,7 @@ TEST(TestBinaryArithmeticDecimal, Multiply) {
     CheckScalarBinary("multiply", left, right, expected);
   }
 
-  // array array, decimal26
+  // array array, decimal256
   {
     auto left = ArrayFromJSON(decimal256(30, 3),
                               R"([
@@ -2484,7 +2688,13 @@ TEST(TestBinaryArithmeticDecimal, Multiply) {
     CheckScalarBinary("multiply", right, left, expected);
   }
 
-  // TODO: decimal integer
+  // decimal integer
+  {
+    auto left = ScalarFromJSON(decimal128(3, 0), R"("666")");
+    auto right = ScalarFromJSON(int64(), "888");
+    auto expected = ScalarFromJSON(decimal128(23, 0), R"("591408")");
+    CheckScalarBinary("multiply", left, right, expected);
+  }
 
   // failed case: result maybe overflow
   {
@@ -2494,7 +2704,7 @@ TEST(TestBinaryArithmeticDecimal, Multiply) {
   }
 }
 
-TEST(TestBinaryArithmeticDecimal, Divide) {
+TEST_F(TestBinaryArithmeticDecimal, Divide) {
   // array array, decimal128
   {
     auto left = ArrayFromJSON(decimal128(13, 3), R"(["1234567890.123", "0.001"])");
@@ -2555,7 +2765,16 @@ TEST(TestBinaryArithmeticDecimal, Divide) {
     CheckScalarBinary("divide", right, left, right_div_left);
   }
 
-  // TODO: decimal integer
+  // decimal integer
+  {
+    auto left = ScalarFromJSON(decimal128(3, 0), R"("100")");
+    auto right = ScalarFromJSON(int64(), "50");
+    auto left_div_right =
+        ScalarFromJSON(decimal128(23, 20), R"("2.00000000000000000000")");
+    auto right_div_left = ScalarFromJSON(decimal128(23, 4), R"("0.5000")");
+    CheckScalarBinary("divide", left, right, left_div_right);
+    CheckScalarBinary("divide", right, left, right_div_left);
+  }
 
   // failed case: result maybe overflow
   {
@@ -2572,6 +2791,96 @@ TEST(TestBinaryArithmeticDecimal, Divide) {
   }
 }
 
+TEST_F(TestBinaryArithmeticDecimal, Atan2) {
+  // Decimal arguments promoted to double, sanity check here
+  const auto func = "atan2";
+  for (const auto& ty : PositiveScaleTypes()) {
+    CheckDecimalToFloat(func, {ArrayFromJSON(ty, R"([])"), ArrayFromJSON(ty, R"([])")});
+    CheckDecimalToFloat(
+        func, {ArrayFromJSON(ty, R"(["1.00", "10.00", "1.00", "2.00", null])"),
+               ArrayFromJSON(ty, R"(["10.00", "10.00", "2.00", "2.00", null])")});
+    CheckDecimalToFloat(
+        func,
+        {ArrayFromJSON(ty, R"(["1.00", "10.00", "1.00", "2.00", null])"),
+         ArrayFromJSON(decimal128(4, 2), R"(["10.00", "10.00", "2.00", "2.00", null])")});
+    CheckDecimalToFloat(func,
+                        {ArrayFromJSON(ty, R"(["1.00", "10.00", "1.00", "2.00", null])"),
+                         ScalarFromJSON(int64(), "10")});
+    CheckDecimalToFloat(func,
+                        {ArrayFromJSON(ty, R"(["1.00", "10.00", "1.00", "2.00", null])"),
+                         ScalarFromJSON(float64(), "10")});
+    CheckDecimalToFloat(func, {ArrayFromJSON(float64(), "[1, 10, 1, 2, null]"),
+                               ScalarFromJSON(ty, R"("10.00")")});
+    CheckDecimalToFloat(func, {ArrayFromJSON(int64(), "[1, 10, 1, 2, null]"),
+                               ScalarFromJSON(ty, R"("10.00")")});
+  }
+  for (const auto& ty : NegativeScaleTypes()) {
+    CheckDecimalToFloat(func, {ArrayFromJSON(ty, R"([])"), ArrayFromJSON(ty, R"([])")});
+    CheckDecimalToFloat(func, {DecimalArrayFromJSON(ty, R"(["12E2", "42E2", null])"),
+                               DecimalArrayFromJSON(ty, R"(["12E2", "42E2", null])")});
+    CheckDecimalToFloat(
+        func, {DecimalArrayFromJSON(ty, R"(["12E2", "42E2", null])"),
+               DecimalArrayFromJSON(decimal128(2, -2), R"(["12E2", "42E2", null])")});
+    CheckDecimalToFloat(func, {DecimalArrayFromJSON(ty, R"(["12E2", "42E2", null])"),
+                               ScalarFromJSON(int64(), "10")});
+  }
+}
+
+TEST_F(TestBinaryArithmeticDecimal, Logb) {
+  // Decimal arguments promoted to double, sanity check here
+  for (const auto& func : {"logb", "logb_checked"}) {
+    for (const auto& ty : PositiveScaleTypes()) {
+      CheckDecimalToFloat(func, {ArrayFromJSON(ty, R"([])"), ArrayFromJSON(ty, R"([])")});
+      CheckDecimalToFloat(
+          func, {ArrayFromJSON(ty, R"(["1.00", "10.00", "1.00", "2.00", null])"),
+                 ArrayFromJSON(ty, R"(["10.00", "10.00", "2.00", "2.00", null])")});
+      CheckDecimalToFloat(
+          func, {ArrayFromJSON(ty, R"(["1.00", "10.00", "1.00", "2.00", null])"),
+                 ArrayFromJSON(decimal128(4, 2),
+                               R"(["10.00", "10.00", "2.00", "2.00", null])")});
+      CheckDecimalToFloat(
+          func, {ArrayFromJSON(ty, R"(["1.00", "10.00", "1.00", "2.00", null])"),
+                 ScalarFromJSON(int64(), "10")});
+      CheckDecimalToFloat(
+          func, {ArrayFromJSON(ty, R"(["1.00", "10.00", "1.00", "2.00", null])"),
+                 ScalarFromJSON(float64(), "10")});
+      CheckDecimalToFloat(func, {ArrayFromJSON(float64(), "[1, 10, 1, 2, null]"),
+                                 ScalarFromJSON(ty, R"("10.00")")});
+      CheckDecimalToFloat(func, {ArrayFromJSON(int64(), "[1, 10, 1, 2, null]"),
+                                 ScalarFromJSON(ty, R"("10.00")")});
+    }
+    for (const auto& ty : NegativeScaleTypes()) {
+      CheckDecimalToFloat(func, {ArrayFromJSON(ty, R"([])"), ArrayFromJSON(ty, R"([])")});
+      CheckDecimalToFloat(func, {DecimalArrayFromJSON(ty, R"(["12E2", "42E2", null])"),
+                                 DecimalArrayFromJSON(ty, R"(["12E2", "42E2", null])")});
+      CheckDecimalToFloat(
+          func, {DecimalArrayFromJSON(ty, R"(["12E2", "42E2", null])"),
+                 DecimalArrayFromJSON(decimal128(2, -2), R"(["12E2", "42E2", null])")});
+      CheckDecimalToFloat(func, {DecimalArrayFromJSON(ty, R"(["12E2", "42E2", null])"),
+                                 ScalarFromJSON(int64(), "10")});
+    }
+  }
+}
+
+TEST_F(TestBinaryArithmeticDecimal, Power) {
+  // Decimal arguments promoted to double, sanity check here
+  for (const auto& func : {"logb", "logb_checked"}) {
+    for (const auto& ty : PositiveScaleTypes()) {
+      CheckDecimalToFloat(func, {ArrayFromJSON(ty, R"([])"), ArrayFromJSON(ty, R"([])")});
+      CheckDecimalToFloat(func, {ArrayFromJSON(ty, R"(["1.00", "2.00", null])"),
+                                 ArrayFromJSON(ty, R"(["1.23", null, "3.45"])")});
+      CheckDecimalToFloat(func, {ArrayFromJSON(ty, R"(["1.00", "2.00", null])"),
+                                 ArrayFromJSON(float64(), R"([1.23, null, 3.45])")});
+      CheckDecimalToFloat(func, {ArrayFromJSON(float64(), R"([1.00, 2.00, null])"),
+                                 ArrayFromJSON(ty, R"(["1.23", null, "3.45"])")});
+      CheckDecimalToFloat(func, {ArrayFromJSON(ty, R"(["1.00", "2.00", null])"),
+                                 ArrayFromJSON(int64(), R"([1, null, 3])")});
+      CheckDecimalToFloat(func, {ArrayFromJSON(int64(), R"([1, 2, null])"),
+                                 ArrayFromJSON(ty, R"(["1.23", null, "3.45"])")});
+    }
+  }
+}
+
 TYPED_TEST(TestBinaryArithmeticIntegral, ShiftLeft) {
   for (auto check_overflow : {false, true}) {
     this->SetOverflowCheck(check_overflow);
diff --git a/cpp/src/arrow/compute/kernels/scalar_validity.cc b/cpp/src/arrow/compute/kernels/scalar_validity.cc
index d23a909c6fd88..3f0a6a4178383 100644
--- a/cpp/src/arrow/compute/kernels/scalar_validity.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_validity.cc
@@ -176,6 +176,18 @@ void AddFloatValidityKernel(const std::shared_ptr<DataType>& ty, ScalarFunction*
                             applicator::ScalarUnary<BooleanType, InType, Op>::Exec));
 }
 
+template <bool kConstant>
+Status ConstBoolExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  if (batch.values[0].is_scalar()) {
+    checked_cast<BooleanScalar*>(out->scalar().get())->value = kConstant;
+    return Status::OK();
+  }
+  ArrayData* array = out->mutable_array();
+  BitUtil::SetBitsTo(array->buffers[1]->mutable_data(), array->offset, array->length,
+                     kConstant);
+  return Status::OK();
+}
+
 std::shared_ptr<ScalarFunction> MakeIsFiniteFunction(std::string name,
                                                      const FunctionDoc* doc) {
   auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
@@ -183,6 +195,15 @@ std::shared_ptr<ScalarFunction> MakeIsFiniteFunction(std::string name,
   AddFloatValidityKernel<FloatType, IsFiniteOperator>(float32(), func.get());
   AddFloatValidityKernel<DoubleType, IsFiniteOperator>(float64(), func.get());
 
+  for (const auto& ty : IntTypes()) {
+    DCHECK_OK(func->AddKernel({InputType(ty->id())}, boolean(), ConstBoolExec<true>));
+  }
+  DCHECK_OK(func->AddKernel({InputType(Type::NA)}, boolean(), ConstBoolExec<true>));
+  DCHECK_OK(
+      func->AddKernel({InputType(Type::DECIMAL128)}, boolean(), ConstBoolExec<true>));
+  DCHECK_OK(
+      func->AddKernel({InputType(Type::DECIMAL256)}, boolean(), ConstBoolExec<true>));
+
   return func;
 }
 
@@ -193,6 +214,15 @@ std::shared_ptr<ScalarFunction> MakeIsInfFunction(std::string name,
   AddFloatValidityKernel<FloatType, IsInfOperator>(float32(), func.get());
   AddFloatValidityKernel<DoubleType, IsInfOperator>(float64(), func.get());
 
+  for (const auto& ty : IntTypes()) {
+    DCHECK_OK(func->AddKernel({InputType(ty->id())}, boolean(), ConstBoolExec<false>));
+  }
+  DCHECK_OK(func->AddKernel({InputType(Type::NA)}, boolean(), ConstBoolExec<false>));
+  DCHECK_OK(
+      func->AddKernel({InputType(Type::DECIMAL128)}, boolean(), ConstBoolExec<false>));
+  DCHECK_OK(
+      func->AddKernel({InputType(Type::DECIMAL256)}, boolean(), ConstBoolExec<false>));
+
   return func;
 }
 
@@ -203,6 +233,15 @@ std::shared_ptr<ScalarFunction> MakeIsNanFunction(std::string name,
   AddFloatValidityKernel<FloatType, IsNanOperator>(float32(), func.get());
   AddFloatValidityKernel<DoubleType, IsNanOperator>(float64(), func.get());
 
+  for (const auto& ty : IntTypes()) {
+    DCHECK_OK(func->AddKernel({InputType(ty->id())}, boolean(), ConstBoolExec<false>));
+  }
+  DCHECK_OK(func->AddKernel({InputType(Type::NA)}, boolean(), ConstBoolExec<false>));
+  DCHECK_OK(
+      func->AddKernel({InputType(Type::DECIMAL128)}, boolean(), ConstBoolExec<false>));
+  DCHECK_OK(
+      func->AddKernel({InputType(Type::DECIMAL256)}, boolean(), ConstBoolExec<false>));
+
   return func;
 }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_validity_test.cc b/cpp/src/arrow/compute/kernels/scalar_validity_test.cc
index 35a6b831ef45a..5b0934828a23b 100644
--- a/cpp/src/arrow/compute/kernels/scalar_validity_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_validity_test.cc
@@ -80,6 +80,45 @@ TEST_F(TestBooleanValidityKernels, IsNull) {
                    "[true, false, false, true]", &nan_is_null_options);
 }
 
+TEST(TestValidityKernels, IsFinite) {
+  for (const auto& ty : IntTypes()) {
+    CheckScalar("is_finite", {ArrayFromJSON(ty, "[0, 1, 42, null]")},
+                ArrayFromJSON(boolean(), "[true, true, true, null]"));
+  }
+  for (const auto& ty : {decimal128(4, 2), decimal256(4, 2)}) {
+    CheckScalar("is_finite", {ArrayFromJSON(ty, R"(["0.00", "1.01", "-42.00", null])")},
+                ArrayFromJSON(boolean(), "[true, true, true, null]"));
+  }
+  CheckScalar("is_finite", {std::make_shared<NullArray>(4)},
+              ArrayFromJSON(boolean(), "[null, null, null, null]"));
+}
+
+TEST(TestValidityKernels, IsInf) {
+  for (const auto& ty : IntTypes()) {
+    CheckScalar("is_inf", {ArrayFromJSON(ty, "[0, 1, 42, null]")},
+                ArrayFromJSON(boolean(), "[false, false, false, null]"));
+  }
+  for (const auto& ty : {decimal128(4, 2), decimal256(4, 2)}) {
+    CheckScalar("is_inf", {ArrayFromJSON(ty, R"(["0.00", "1.01", "-42.00", null])")},
+                ArrayFromJSON(boolean(), "[false, false, false, null]"));
+  }
+  CheckScalar("is_inf", {std::make_shared<NullArray>(4)},
+              ArrayFromJSON(boolean(), "[null, null, null, null]"));
+}
+
+TEST(TestValidityKernels, IsNan) {
+  for (const auto& ty : IntTypes()) {
+    CheckScalar("is_nan", {ArrayFromJSON(ty, "[0, 1, 42, null]")},
+                ArrayFromJSON(boolean(), "[false, false, false, null]"));
+  }
+  for (const auto& ty : {decimal128(4, 2), decimal256(4, 2)}) {
+    CheckScalar("is_nan", {ArrayFromJSON(ty, R"(["0.00", "1.01", "-42.00", null])")},
+                ArrayFromJSON(boolean(), "[false, false, false, null]"));
+  }
+  CheckScalar("is_nan", {std::make_shared<NullArray>(4)},
+              ArrayFromJSON(boolean(), "[null, null, null, null]"));
+}
+
 TEST(TestValidityKernels, IsValidIsNullNullType) {
   CheckScalarUnary("is_null", std::make_shared<NullArray>(5),
                    ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
diff --git a/cpp/src/arrow/util/basic_decimal.cc b/cpp/src/arrow/util/basic_decimal.cc
index 1832bf5c4f9e7..c3b9d0272e86f 100644
--- a/cpp/src/arrow/util/basic_decimal.cc
+++ b/cpp/src/arrow/util/basic_decimal.cc
@@ -1115,6 +1115,12 @@ const BasicDecimal128& BasicDecimal128::GetHalfScaleMultiplier(int32_t scale) {
 
 const BasicDecimal128& BasicDecimal128::GetMaxValue() { return kMaxValue; }
 
+BasicDecimal128 BasicDecimal128::GetMaxValue(int32_t precision) {
+  DCHECK_GE(precision, 0);
+  DCHECK_LE(precision, 38);
+  return ScaleMultipliers[precision] - 1;
+}
+
 BasicDecimal128 BasicDecimal128::IncreaseScaleBy(int32_t increase_by) const {
   DCHECK_GE(increase_by, 0);
   DCHECK_LE(increase_by, 38);
@@ -1330,6 +1336,12 @@ const BasicDecimal256& BasicDecimal256::GetHalfScaleMultiplier(int32_t scale) {
   return ScaleMultipliersHalfDecimal256[scale];
 }
 
+BasicDecimal256 BasicDecimal256::GetMaxValue(int32_t precision) {
+  DCHECK_GE(precision, 0);
+  DCHECK_LE(precision, 76);
+  return ScaleMultipliersDecimal256[precision] + (-1);
+}
+
 BasicDecimal256 operator*(const BasicDecimal256& left, const BasicDecimal256& right) {
   BasicDecimal256 result = left;
   result *= right;
diff --git a/cpp/src/arrow/util/basic_decimal.h b/cpp/src/arrow/util/basic_decimal.h
index a4df3285596b5..93dced967e32d 100644
--- a/cpp/src/arrow/util/basic_decimal.h
+++ b/cpp/src/arrow/util/basic_decimal.h
@@ -227,6 +227,9 @@ class ARROW_EXPORT BasicDecimal128 {
   /// \brief Get the maximum valid unscaled decimal value.
   static const BasicDecimal128& GetMaxValue();
 
+  /// \brief Get the maximum valid unscaled decimal value for the given precision.
+  static BasicDecimal128 GetMaxValue(int32_t precision);
+
   /// \brief Get the maximum decimal value (is not a valid value).
   static inline constexpr BasicDecimal128 GetMaxSentinel() {
     return BasicDecimal128(/*high=*/std::numeric_limits<int64_t>::max(),
@@ -427,6 +430,9 @@ class ARROW_EXPORT BasicDecimal256 {
   /// \brief In-place division.
   BasicDecimal256& operator/=(const BasicDecimal256& right);
 
+  /// \brief Get the maximum valid unscaled decimal value for the given precision.
+  static BasicDecimal256 GetMaxValue(int32_t precision);
+
   /// \brief Get the maximum decimal value (is not a valid value).
   static inline constexpr BasicDecimal256 GetMaxSentinel() {
 #if ARROW_LITTLE_ENDIAN
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 34b1f3448da7c..26c44a67e5bb7 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -219,13 +219,13 @@ the input to a single output value.
 +--------------------+-------+------------------+------------------------+----------------------------------+-------+
 | quantile           | Unary | Numeric          | Scalar Numeric         | :struct:`QuantileOptions`        | \(7)  |
 +--------------------+-------+------------------+------------------------+----------------------------------+-------+
-| stddev             | Unary | Numeric          | Scalar Float64         | :struct:`VarianceOptions`        |       |
+| stddev             | Unary | Numeric          | Scalar Float64         | :struct:`VarianceOptions`        | \(8)  |
 +--------------------+-------+------------------+------------------------+----------------------------------+-------+
 | sum                | Unary | Numeric          | Scalar Numeric         | :struct:`ScalarAggregateOptions` | \(6)  |
 +--------------------+-------+------------------+------------------------+----------------------------------+-------+
-| tdigest            | Unary | Numeric          | Float64                | :struct:`TDigestOptions`         | \(8)  |
+| tdigest            | Unary | Numeric          | Float64                | :struct:`TDigestOptions`         | \(9)  |
 +--------------------+-------+------------------+------------------------+----------------------------------+-------+
-| variance           | Unary | Numeric          | Scalar Float64         | :struct:`VarianceOptions`        |       |
+| variance           | Unary | Numeric          | Scalar Float64         | :struct:`VarianceOptions`        | \(8)  |
 +--------------------+-------+------------------+------------------------+----------------------------------+-------+
 
 * \(1) If null values are taken into account, by setting the
@@ -255,10 +255,14 @@ the input to a single output value.
 
 * \(7) Output is Float64 or input type, depending on QuantileOptions.
 
-* \(8) tdigest/t-digest computes approximate quantiles, and so only needs a
+* \(8) Decimal arguments are cast to Float64 first.
+
+* \(9) tdigest/t-digest computes approximate quantiles, and so only needs a
   fixed amount of memory. See the `reference implementation
   <https://github.com/tdunning/t-digest>`_ for details.
 
+  Decimal arguments are cast to Float64 first.
+
 Grouped Aggregations ("group by")
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -330,13 +334,13 @@ equivalents above and reflects how they are implemented internally.
 +-------------------------+-------+------------------------------------+------------------------+----------------------------------+-------+
 | hash_product            | Unary | Numeric                            | Numeric                | :struct:`ScalarAggregateOptions` | \(4)  |
 +-------------------------+-------+------------------------------------+------------------------+----------------------------------+-------+
-| hash_stddev             | Unary | Numeric                            | Float64                | :struct:`VarianceOptions`        |       |
+| hash_stddev             | Unary | Numeric                            | Float64                | :struct:`VarianceOptions`        | \(5)  |
 +-------------------------+-------+------------------------------------+------------------------+----------------------------------+-------+
 | hash_sum                | Unary | Numeric                            | Numeric                | :struct:`ScalarAggregateOptions` | \(4)  |
 +-------------------------+-------+------------------------------------+------------------------+----------------------------------+-------+
-| hash_tdigest            | Unary | Numeric                            | FixedSizeList[Float64] | :struct:`TDigestOptions`         | \(5)  |
+| hash_tdigest            | Unary | Numeric                            | FixedSizeList[Float64] | :struct:`TDigestOptions`         | \(6)  |
 +-------------------------+-------+------------------------------------+------------------------+----------------------------------+-------+
-| hash_variance           | Unary | Numeric                            | Float64                | :struct:`VarianceOptions`        |       |
+| hash_variance           | Unary | Numeric                            | Float64                | :struct:`VarianceOptions`        | \(5)  |
 +-------------------------+-------+------------------------------------+------------------------+----------------------------------+-------+
 
 * \(1) If null values are taken into account, by setting the
@@ -357,10 +361,14 @@ equivalents above and reflects how they are implemented internally.
 * \(4) Output is Int64, UInt64, Float64, or Decimal128/256, depending on the
   input type.
 
-* \(5) T-digest computes approximate quantiles, and so only needs a
+* \(5) Decimal arguments are cast to Float64 first.
+
+* \(6) T-digest computes approximate quantiles, and so only needs a
   fixed amount of memory. See the `reference implementation
   <https://github.com/tdunning/t-digest>`_ for details.
 
+  Decimal arguments are cast to Float64 first.
+
 Element-wise ("scalar") functions
 ---------------------------------
 
@@ -456,8 +464,8 @@ decimal and integer arguments will cast all arguments to decimals.
   enough scale kept. Error is returned if the result precision is beyond the
   decimal value range.
 
-* \(2) Output is any of (-1,1) for nonzero inputs and 0 for zero input.
-  NaN values return NaN.  Integral values return signedness as Int8 and
+* \(2) Output is any of (-1,1) for nonzero inputs and 0 for zero input.  NaN
+  values return NaN.  Integral and decimal values return signedness as Int8 and
   floating-point values return it with the same type as the input values.
 
 Bit-wise functions
@@ -585,29 +593,31 @@ Logarithmic functions
 Logarithmic functions are also supported, and also offer ``_checked``
 variants that check for domain errors if needed.
 
-+--------------------------+------------+--------------------+---------------------+
-| Function name            | Arity      | Input types        | Output type         |
-+==========================+============+====================+=====================+
-| ln                       | Unary      | Float32/Float64    | Float32/Float64     |
-+--------------------------+------------+--------------------+---------------------+
-| ln_checked               | Unary      | Float32/Float64    | Float32/Float64     |
-+--------------------------+------------+--------------------+---------------------+
-| log10                    | Unary      | Float32/Float64    | Float32/Float64     |
-+--------------------------+------------+--------------------+---------------------+
-| log10_checked            | Unary      | Float32/Float64    | Float32/Float64     |
-+--------------------------+------------+--------------------+---------------------+
-| log1p                    | Unary      | Float32/Float64    | Float32/Float64     |
-+--------------------------+------------+--------------------+---------------------+
-| log1p_checked            | Unary      | Float32/Float64    | Float32/Float64     |
-+--------------------------+------------+--------------------+---------------------+
-| log2                     | Unary      | Float32/Float64    | Float32/Float64     |
-+--------------------------+------------+--------------------+---------------------+
-| log2_checked             | Unary      | Float32/Float64    | Float32/Float64     |
-+--------------------------+------------+--------------------+---------------------+
-| logb                     | Binary     | Float32/Float64    | Float32/Float64     |
-+--------------------------+------------+--------------------+---------------------+
-| logb_checked             | Binary     | Float32/Float64    | Float32/Float64     |
-+--------------------------+------------+--------------------+---------------------+
+Decimal values are accepted, but are cast to Float64 first.
+
++--------------------------+------------+-------------------------+---------------------+
+| Function name            | Arity      | Input types             | Output type         |
++==========================+============+=========================+=====================+
+| ln                       | Unary      | Float32/Float64/Decimal | Float32/Float64     |
++--------------------------+------------+-------------------------+---------------------+
+| ln_checked               | Unary      | Float32/Float64/Decimal | Float32/Float64     |
++--------------------------+------------+-------------------------+---------------------+
+| log10                    | Unary      | Float32/Float64/Decimal | Float32/Float64     |
++--------------------------+------------+-------------------------+---------------------+
+| log10_checked            | Unary      | Float32/Float64/Decimal | Float32/Float64     |
++--------------------------+------------+-------------------------+---------------------+
+| log1p                    | Unary      | Float32/Float64/Decimal | Float32/Float64     |
++--------------------------+------------+-------------------------+---------------------+
+| log1p_checked            | Unary      | Float32/Float64/Decimal | Float32/Float64     |
++--------------------------+------------+-------------------------+---------------------+
+| log2                     | Unary      | Float32/Float64/Decimal | Float32/Float64     |
++--------------------------+------------+-------------------------+---------------------+
+| log2_checked             | Unary      | Float32/Float64/Decimal | Float32/Float64     |
++--------------------------+------------+-------------------------+---------------------+
+| logb                     | Binary     | Float32/Float64/Decimal | Float32/Float64     |
++--------------------------+------------+-------------------------+---------------------+
+| logb_checked             | Binary     | Float32/Float64/Decimal | Float32/Float64     |
++--------------------------+------------+-------------------------+---------------------+
 
 Trigonometric functions
 ~~~~~~~~~~~~~~~~~~~~~~~
@@ -615,33 +625,35 @@ Trigonometric functions
 Trigonometric functions are also supported, and also offer ``_checked``
 variants that check for domain errors if needed.
 
-+--------------------------+------------+--------------------+---------------------+
-| Function name            | Arity      | Input types        | Output type         |
-+==========================+============+====================+=====================+
-| acos                     | Unary      | Float32/Float64    | Float32/Float64     |
-+--------------------------+------------+--------------------+---------------------+
-| acos_checked             | Unary      | Float32/Float64    | Float32/Float64     |
-+--------------------------+------------+--------------------+---------------------+
-| asin                     | Unary      | Float32/Float64    | Float32/Float64     |
-+--------------------------+------------+--------------------+---------------------+
-| asin_checked             | Unary      | Float32/Float64    | Float32/Float64     |
-+--------------------------+------------+--------------------+---------------------+
-| atan                     | Unary      | Float32/Float64    | Float32/Float64     |
-+--------------------------+------------+--------------------+---------------------+
-| atan2                    | Binary     | Float32/Float64    | Float32/Float64     |
-+--------------------------+------------+--------------------+---------------------+
-| cos                      | Unary      | Float32/Float64    | Float32/Float64     |
-+--------------------------+------------+--------------------+---------------------+
-| cos_checked              | Unary      | Float32/Float64    | Float32/Float64     |
-+--------------------------+------------+--------------------+---------------------+
-| sin                      | Unary      | Float32/Float64    | Float32/Float64     |
-+--------------------------+------------+--------------------+---------------------+
-| sin_checked              | Unary      | Float32/Float64    | Float32/Float64     |
-+--------------------------+------------+--------------------+---------------------+
-| tan                      | Unary      | Float32/Float64    | Float32/Float64     |
-+--------------------------+------------+--------------------+---------------------+
-| tan_checked              | Unary      | Float32/Float64    | Float32/Float64     |
-+--------------------------+------------+--------------------+---------------------+
+Decimal values are accepted, but are cast to Float64 first.
+
++--------------------------+------------+-------------------------+---------------------+
+| Function name            | Arity      | Input types             | Output type         |
++==========================+============+=========================+=====================+
+| acos                     | Unary      | Float32/Float64/Decimal | Float32/Float64     |
++--------------------------+------------+-------------------------+---------------------+
+| acos_checked             | Unary      | Float32/Float64/Decimal | Float32/Float64     |
++--------------------------+------------+-------------------------+---------------------+
+| asin                     | Unary      | Float32/Float64/Decimal | Float32/Float64     |
++--------------------------+------------+-------------------------+---------------------+
+| asin_checked             | Unary      | Float32/Float64/Decimal | Float32/Float64     |
++--------------------------+------------+-------------------------+---------------------+
+| atan                     | Unary      | Float32/Float64/Decimal | Float32/Float64     |
++--------------------------+------------+-------------------------+---------------------+
+| atan2                    | Binary     | Float32/Float64/Decimal | Float32/Float64     |
++--------------------------+------------+-------------------------+---------------------+
+| cos                      | Unary      | Float32/Float64/Decimal | Float32/Float64     |
++--------------------------+------------+-------------------------+---------------------+
+| cos_checked              | Unary      | Float32/Float64/Decimal | Float32/Float64     |
++--------------------------+------------+-------------------------+---------------------+
+| sin                      | Unary      | Float32/Float64/Decimal | Float32/Float64     |
++--------------------------+------------+-------------------------+---------------------+
+| sin_checked              | Unary      | Float32/Float64/Decimal | Float32/Float64     |
++--------------------------+------------+-------------------------+---------------------+
+| tan                      | Unary      | Float32/Float64/Decimal | Float32/Float64     |
++--------------------------+------------+-------------------------+---------------------+
+| tan_checked              | Unary      | Float32/Float64/Decimal | Float32/Float64     |
++--------------------------+------------+-------------------------+---------------------+
 
 Comparisons
 ~~~~~~~~~~~
@@ -1117,26 +1129,28 @@ Containment tests
 Categorizations
 ~~~~~~~~~~~~~~~
 
-+-------------------+------------+---------------------+---------------------+------------------------+---------+
-| Function name     | Arity      | Input types         | Output type         | Options class          | Notes   |
-+===================+============+=====================+=====================+========================+=========+
-| is_finite         | Unary      | Float, Double       | Boolean             |                        | \(1)    |
-+-------------------+------------+---------------------+---------------------+------------------------+---------+
-| is_inf            | Unary      | Float, Double       | Boolean             |                        | \(2)    |
-+-------------------+------------+---------------------+---------------------+------------------------+---------+
-| is_nan            | Unary      | Float, Double       | Boolean             |                        | \(3)    |
-+-------------------+------------+---------------------+---------------------+------------------------+---------+
-| is_null           | Unary      | Any                 | Boolean             | :struct:`NullOptions`  | \(4)    |
-+-------------------+------------+---------------------+---------------------+------------------------+---------+
-| is_valid          | Unary      | Any                 | Boolean             |                        | \(5)    |
-+-------------------+------------+---------------------+---------------------+------------------------+---------+
++-------------------+------------+-------------------------+---------------------+------------------------+---------+
+| Function name     | Arity      | Input types             | Output type         | Options class          | Notes   |
++===================+============+=========================+=====================+========================+=========+
+| is_finite         | Unary      | Null, Numeric           | Boolean             |                        | \(1)    |
++-------------------+------------+-------------------------+---------------------+------------------------+---------+
+| is_inf            | Unary      | Null, Numeric           | Boolean             |                        | \(2)    |
++-------------------+------------+-------------------------+---------------------+------------------------+---------+
+| is_nan            | Unary      | Null, Numeric           | Boolean             |                        | \(3)    |
++-------------------+------------+-------------------------+---------------------+------------------------+---------+
+| is_null           | Unary      | Any                     | Boolean             | :struct:`NullOptions`  | \(4)    |
++-------------------+------------+-------------------------+---------------------+------------------------+---------+
+| is_valid          | Unary      | Any                     | Boolean             |                        | \(5)    |
++-------------------+------------+-------------------------+---------------------+------------------------+---------+
 
 * \(1) Output is true iff the corresponding input element is finite (neither Infinity,
-  -Infinity, nor NaN).
+  -Infinity, nor NaN). Hence, for Decimal and integer inputs this always returns true.
 
 * \(2) Output is true iff the corresponding input element is Infinity/-Infinity.
+  Hence, for Decimal and integer inputs this always returns false.
 
 * \(3) Output is true iff the corresponding input element is NaN.
+  Hence, for Decimal and integer inputs this always returns false.
 
 * \(4) Output is true iff the corresponding input element is null. NaN values
   can also be considered null by setting :member:`NullOptions::nan_is_null`.