diff --git a/cpp/src/arrow/scalar.h b/cpp/src/arrow/scalar.h
index 7a33126eaeee7..9df3e3c74e3f9 100644
--- a/cpp/src/arrow/scalar.h
+++ b/cpp/src/arrow/scalar.h
@@ -380,43 +380,33 @@ struct ARROW_EXPORT DurationScalar : public TemporalScalar<DurationType> {
       : DurationScalar(std::move(value), duration(unit)) {}
 };
 
-struct ARROW_EXPORT Decimal128Scalar : public internal::PrimitiveScalarBase {
+template <typename TYPE_CLASS, typename VALUE_TYPE>
+struct ARROW_EXPORT DecimalScalar : public internal::PrimitiveScalarBase {
   using internal::PrimitiveScalarBase::PrimitiveScalarBase;
-  using TypeClass = Decimal128Type;
-  using ValueType = Decimal128;
+  using TypeClass = TYPE_CLASS;
+  using ValueType = VALUE_TYPE;
 
-  Decimal128Scalar(Decimal128 value, std::shared_ptr<DataType> type)
+  DecimalScalar(ValueType value, std::shared_ptr<DataType> type)
       : internal::PrimitiveScalarBase(std::move(type), true), value(value) {}
 
   void* mutable_data() override {
     return reinterpret_cast<void*>(value.mutable_native_endian_bytes());
   }
+
   util::string_view view() const override {
     return util::string_view(reinterpret_cast<const char*>(value.native_endian_bytes()),
-                             16);
+                             ValueType::kByteWidth);
   }
 
-  Decimal128 value;
+  ValueType value;
 };
 
-struct ARROW_EXPORT Decimal256Scalar : public internal::PrimitiveScalarBase {
-  using internal::PrimitiveScalarBase::PrimitiveScalarBase;
-  using TypeClass = Decimal256Type;
-  using ValueType = Decimal256;
-
-  Decimal256Scalar(Decimal256 value, std::shared_ptr<DataType> type)
-      : internal::PrimitiveScalarBase(std::move(type), true), value(value) {}
-
-  void* mutable_data() override {
-    return reinterpret_cast<void*>(value.mutable_native_endian_bytes());
-  }
-  util::string_view view() const override {
-    const std::array<uint64_t, 4>& bytes = value.native_endian_array();
-    return util::string_view(reinterpret_cast<const char*>(bytes.data()),
-                             bytes.size() * sizeof(uint64_t));
-  }
+struct ARROW_EXPORT Decimal128Scalar : public DecimalScalar<Decimal128Type, Decimal128> {
+  using DecimalScalar::DecimalScalar;
+};
 
-  Decimal256 value;
+struct ARROW_EXPORT Decimal256Scalar : public DecimalScalar<Decimal256Type, Decimal256> {
+  using DecimalScalar::DecimalScalar;
 };
 
 struct ARROW_EXPORT BaseListScalar : public Scalar {
diff --git a/cpp/src/arrow/util/basic_decimal.cc b/cpp/src/arrow/util/basic_decimal.cc
index aaae6bfe7c2ee..3d6edaebfd468 100644
--- a/cpp/src/arrow/util/basic_decimal.cc
+++ b/cpp/src/arrow/util/basic_decimal.cc
@@ -37,8 +37,11 @@
 
 namespace arrow {
 
+using internal::AddWithOverflow;
 using internal::SafeLeftShift;
 using internal::SafeSignedAdd;
+using internal::SafeSignedSubtract;
+using internal::SubtractWithOverflow;
 
 static const BasicDecimal128 ScaleMultipliers[] = {
     BasicDecimal128(1LL),
@@ -368,43 +371,16 @@ static constexpr uint64_t kInt32Mask = 0xFFFFFFFF;
 static constexpr BasicDecimal128 kMaxValue =
     BasicDecimal128(5421010862427522170LL, 687399551400673280ULL - 1);
 
-#if ARROW_LITTLE_ENDIAN
-BasicDecimal128::BasicDecimal128(const uint8_t* bytes)
-    : BasicDecimal128(reinterpret_cast<const int64_t*>(bytes)[1],
-                      reinterpret_cast<const uint64_t*>(bytes)[0]) {}
-#else
-BasicDecimal128::BasicDecimal128(const uint8_t* bytes)
-    : BasicDecimal128(reinterpret_cast<const int64_t*>(bytes)[0],
-                      reinterpret_cast<const uint64_t*>(bytes)[1]) {}
-#endif
-
-constexpr int BasicDecimal128::kBitWidth;
 constexpr int BasicDecimal128::kMaxPrecision;
 constexpr int BasicDecimal128::kMaxScale;
 
-std::array<uint8_t, 16> BasicDecimal128::ToBytes() const {
-  std::array<uint8_t, 16> out{{0}};
-  ToBytes(out.data());
-  return out;
-}
-
-void BasicDecimal128::ToBytes(uint8_t* out) const {
-  DCHECK_NE(out, nullptr);
-#if ARROW_LITTLE_ENDIAN
-  reinterpret_cast<uint64_t*>(out)[0] = low_bits_;
-  reinterpret_cast<int64_t*>(out)[1] = high_bits_;
-#else
-  reinterpret_cast<int64_t*>(out)[0] = high_bits_;
-  reinterpret_cast<uint64_t*>(out)[1] = low_bits_;
-#endif
-}
-
 BasicDecimal128& BasicDecimal128::Negate() {
-  low_bits_ = ~low_bits_ + 1;
-  high_bits_ = ~high_bits_;
-  if (low_bits_ == 0) {
-    high_bits_ = SafeSignedAdd<int64_t>(high_bits_, 1);
+  uint64_t result_lo = ~low_bits() + 1;
+  int64_t result_hi = ~high_bits();
+  if (result_lo == 0) {
+    result_hi = SafeSignedAdd<int64_t>(result_hi, 1);
   }
+  *this = BasicDecimal128(result_hi, result_lo);
   return *this;
 }
 
@@ -422,22 +398,18 @@ bool BasicDecimal128::FitsInPrecision(int32_t precision) const {
 }
 
 BasicDecimal128& BasicDecimal128::operator+=(const BasicDecimal128& right) {
-  const uint64_t sum = low_bits_ + right.low_bits_;
-  high_bits_ = SafeSignedAdd<int64_t>(high_bits_, right.high_bits_);
-  if (sum < low_bits_) {
-    high_bits_ = SafeSignedAdd<int64_t>(high_bits_, 1);
-  }
-  low_bits_ = sum;
+  int64_t result_hi = SafeSignedAdd(high_bits(), right.high_bits());
+  uint64_t result_lo = low_bits() + right.low_bits();
+  result_hi = SafeSignedAdd<int64_t>(result_hi, result_lo < low_bits());
+  *this = BasicDecimal128(result_hi, result_lo);
   return *this;
 }
 
 BasicDecimal128& BasicDecimal128::operator-=(const BasicDecimal128& right) {
-  const uint64_t diff = low_bits_ - right.low_bits_;
-  high_bits_ -= right.high_bits_;
-  if (diff > low_bits_) {
-    --high_bits_;
-  }
-  low_bits_ = diff;
+  int64_t result_hi = SafeSignedSubtract(high_bits(), right.high_bits());
+  uint64_t result_lo = low_bits() - right.low_bits();
+  result_hi = SafeSignedSubtract<int64_t>(result_hi, result_lo > low_bits());
+  *this = BasicDecimal128(result_hi, result_lo);
   return *this;
 }
 
@@ -449,47 +421,53 @@ BasicDecimal128& BasicDecimal128::operator/=(const BasicDecimal128& right) {
 }
 
 BasicDecimal128& BasicDecimal128::operator|=(const BasicDecimal128& right) {
-  low_bits_ |= right.low_bits_;
-  high_bits_ |= right.high_bits_;
+  array_[0] |= right.array_[0];
+  array_[1] |= right.array_[1];
   return *this;
 }
 
 BasicDecimal128& BasicDecimal128::operator&=(const BasicDecimal128& right) {
-  low_bits_ &= right.low_bits_;
-  high_bits_ &= right.high_bits_;
+  array_[0] &= right.array_[0];
+  array_[1] &= right.array_[1];
   return *this;
 }
 
 BasicDecimal128& BasicDecimal128::operator<<=(uint32_t bits) {
   if (bits != 0) {
+    uint64_t result_lo;
+    int64_t result_hi;
     if (bits < 64) {
-      high_bits_ = SafeLeftShift(high_bits_, bits);
-      high_bits_ |= (low_bits_ >> (64 - bits));
-      low_bits_ <<= bits;
+      result_hi = SafeLeftShift(high_bits(), bits);
+      result_hi |= (low_bits() >> (64 - bits));
+      result_lo = low_bits() << bits;
     } else if (bits < 128) {
-      high_bits_ = static_cast<int64_t>(low_bits_) << (bits - 64);
-      low_bits_ = 0;
+      result_hi = static_cast<int64_t>(low_bits() << (bits - 64));
+      result_lo = 0;
     } else {
-      high_bits_ = 0;
-      low_bits_ = 0;
+      result_hi = 0;
+      result_lo = 0;
     }
+    *this = BasicDecimal128(result_hi, result_lo);
   }
   return *this;
 }
 
 BasicDecimal128& BasicDecimal128::operator>>=(uint32_t bits) {
   if (bits != 0) {
+    uint64_t result_lo;
+    int64_t result_hi;
     if (bits < 64) {
-      low_bits_ >>= bits;
-      low_bits_ |= static_cast<uint64_t>(high_bits_ << (64 - bits));
-      high_bits_ = static_cast<int64_t>(static_cast<uint64_t>(high_bits_) >> bits);
+      result_lo = low_bits() >> bits;
+      result_lo |= static_cast<uint64_t>(high_bits()) << (64 - bits);
+      result_hi = high_bits() >> bits;
     } else if (bits < 128) {
-      low_bits_ = static_cast<uint64_t>(high_bits_ >> (bits - 64));
-      high_bits_ = static_cast<int64_t>(high_bits_ >= 0L ? 0L : -1L);
+      result_lo = static_cast<uint64_t>(high_bits() >> (bits - 64));
+      result_hi = high_bits() >> 63;
     } else {
-      high_bits_ = static_cast<int64_t>(high_bits_ >= 0L ? 0L : -1L);
-      low_bits_ = static_cast<uint64_t>(high_bits_);
+      result_hi = high_bits() >> 63;
+      result_lo = static_cast<uint64_t>(result_hi);
     }
+    *this = BasicDecimal128(result_hi, result_lo);
   }
   return *this;
 }
@@ -633,8 +611,7 @@ BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
   BasicDecimal128 y = BasicDecimal128::Abs(right);
   uint128_t r(x);
   r *= uint128_t{y};
-  high_bits_ = r.hi();
-  low_bits_ = r.lo();
+  *this = BasicDecimal128(static_cast<int64_t>(r.hi()), r.lo());
   if (negate) {
     Negate();
   }
@@ -1158,20 +1135,13 @@ BasicDecimal128 BasicDecimal128::ReduceScaleBy(int32_t reduce_by, bool round) co
 int32_t BasicDecimal128::CountLeadingBinaryZeros() const {
   DCHECK_GE(*this, BasicDecimal128(0));
 
-  if (high_bits_ == 0) {
-    return bit_util::CountLeadingZeros(low_bits_) + 64;
+  if (high_bits() == 0) {
+    return bit_util::CountLeadingZeros(low_bits()) + 64;
   } else {
-    return bit_util::CountLeadingZeros(static_cast<uint64_t>(high_bits_));
+    return bit_util::CountLeadingZeros(static_cast<uint64_t>(high_bits()));
   }
 }
 
-BasicDecimal256::BasicDecimal256(const uint8_t* bytes)
-    : array_({reinterpret_cast<const uint64_t*>(bytes)[0],
-              reinterpret_cast<const uint64_t*>(bytes)[1],
-              reinterpret_cast<const uint64_t*>(bytes)[2],
-              reinterpret_cast<const uint64_t*>(bytes)[3]}) {}
-
-constexpr int BasicDecimal256::kBitWidth;
 constexpr int BasicDecimal256::kMaxPrecision;
 constexpr int BasicDecimal256::kMaxScale;
 
@@ -1243,20 +1213,6 @@ BasicDecimal256& BasicDecimal256::operator<<=(uint32_t bits) {
   return *this;
 }
 
-std::array<uint8_t, 32> BasicDecimal256::ToBytes() const {
-  std::array<uint8_t, 32> out{{0}};
-  ToBytes(out.data());
-  return out;
-}
-
-void BasicDecimal256::ToBytes(uint8_t* out) const {
-  DCHECK_NE(out, nullptr);
-  reinterpret_cast<uint64_t*>(out)[0] = array_[0];
-  reinterpret_cast<uint64_t*>(out)[1] = array_[1];
-  reinterpret_cast<uint64_t*>(out)[2] = array_[2];
-  reinterpret_cast<uint64_t*>(out)[3] = array_[3];
-}
-
 BasicDecimal256& BasicDecimal256::operator*=(const BasicDecimal256& right) {
   // Since the max value of BasicDecimal256 is supposed to be 1e76 - 1 and the
   // min the negation taking the absolute values here should always be safe.
@@ -1391,4 +1347,8 @@ BasicDecimal256 operator/(const BasicDecimal256& left, const BasicDecimal256& ri
   return result;
 }
 
+// Explicitly instantiate template base class, for DLL linking on Windows
+template class GenericBasicDecimal<BasicDecimal128, 128>;
+template class GenericBasicDecimal<BasicDecimal256, 256>;
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/basic_decimal.h b/cpp/src/arrow/util/basic_decimal.h
index 00514194155c3..90ffcec776c45 100644
--- a/cpp/src/arrow/util/basic_decimal.h
+++ b/cpp/src/arrow/util/basic_decimal.h
@@ -19,6 +19,7 @@
 
 #include <array>
 #include <cstdint>
+#include <cstring>
 #include <limits>
 #include <string>
 #include <type_traits>
@@ -37,50 +38,116 @@ enum class DecimalStatus {
   kRescaleDataLoss,
 };
 
+template <typename Derived, int BIT_WIDTH, int NWORDS = BIT_WIDTH / 64>
+class ARROW_EXPORT GenericBasicDecimal {
+ protected:
+  struct LittleEndianArrayTag {};
+
+#if ARROW_LITTLE_ENDIAN
+  static constexpr int kHighWordIndex = NWORDS - 1;
+#else
+  static constexpr int kHighWordIndex = 0;
+#endif
+
+ public:
+  static constexpr int kBitWidth = BIT_WIDTH;
+  static constexpr int kByteWidth = kBitWidth / 8;
+
+  // A constructor tag to introduce a little-endian encoded array
+  static constexpr LittleEndianArrayTag LittleEndianArray{};
+
+  using WordArray = std::array<uint64_t, NWORDS>;
+
+  /// \brief Empty constructor creates a decimal with a value of 0.
+  constexpr GenericBasicDecimal() noexcept : array_({0}) {}
+
+  /// \brief Create a decimal from the two's complement representation.
+  ///
+  /// Input array is assumed to be in native endianness.
+  constexpr GenericBasicDecimal(const WordArray& array) noexcept : array_(array) {}
+
+  /// \brief Create a decimal from the two's complement representation.
+  ///
+  /// Input array is assumed to be in little endianness, with native endian elements.
+  GenericBasicDecimal(LittleEndianArrayTag, const WordArray& array) noexcept
+      : GenericBasicDecimal(bit_util::little_endian::ToNative(array)) {}
+
+  /// \brief Create a decimal from an array of bytes.
+  ///
+  /// Bytes are assumed to be in native-endian byte order.
+  explicit GenericBasicDecimal(const uint8_t* bytes) {
+    memcpy(array_.data(), bytes, sizeof(array_));
+  }
+
+  /// \brief Get the bits of the two's complement representation of the number.
+  ///
+  /// The elements are in native endian order. The bits within each uint64_t element
+  /// are in native endian order. For example, on a little endian machine,
+  /// BasicDecimal128(123).native_endian_array() = {123, 0};
+  /// but on a big endian machine,
+  /// BasicDecimal128(123).native_endian_array() = {0, 123};
+  constexpr const WordArray& native_endian_array() const { return array_; }
+
+  /// \brief Get the bits of the two's complement representation of the number.
+  ///
+  /// The elements are in little endian order. However, the bits within each
+  /// uint64_t element are in native endian order.
+  /// For example, BasicDecimal128(123).little_endian_array() = {123, 0};
+  WordArray little_endian_array() const {
+    return bit_util::little_endian::FromNative(array_);
+  }
+
+  const uint8_t* native_endian_bytes() const {
+    return reinterpret_cast<const uint8_t*>(array_.data());
+  }
+
+  uint8_t* mutable_native_endian_bytes() {
+    return reinterpret_cast<uint8_t*>(array_.data());
+  }
+
+  /// \brief Return the raw bytes of the value in native-endian byte order.
+  std::array<uint8_t, kByteWidth> ToBytes() const {
+    std::array<uint8_t, kByteWidth> out{{0}};
+    memcpy(out.data(), array_.data(), kByteWidth);
+    return out;
+  }
+
+  /// \brief Copy the raw bytes of the value in native-endian byte order.
+  void ToBytes(uint8_t* out) const { memcpy(out, array_.data(), kByteWidth); }
+
+  /// Return 1 if positive or zero, -1 if strictly negative.
+  int64_t Sign() const {
+    return 1 | (static_cast<int64_t>(array_[kHighWordIndex]) >> 63);
+  }
+
+  bool IsNegative() const { return static_cast<int64_t>(array_[kHighWordIndex]) < 0; }
+
+ protected:
+  WordArray array_;
+};
+
 /// Represents a signed 128-bit integer in two's complement.
 ///
 /// This class is also compiled into LLVM IR - so, it should not have cpp references like
 /// streams and boost.
-class ARROW_EXPORT BasicDecimal128 {
-  struct LittleEndianArrayTag {};
-
+class ARROW_EXPORT BasicDecimal128 : public GenericBasicDecimal<BasicDecimal128, 128> {
  public:
-  static constexpr int kBitWidth = 128;
   static constexpr int kMaxPrecision = 38;
   static constexpr int kMaxScale = 38;
 
-  // A constructor tag to introduce a little-endian encoded array
-  static constexpr LittleEndianArrayTag LittleEndianArray{};
+  using GenericBasicDecimal::GenericBasicDecimal;
+
+  constexpr BasicDecimal128() noexcept : GenericBasicDecimal() {}
 
   /// \brief Create a BasicDecimal128 from the two's complement representation.
 #if ARROW_LITTLE_ENDIAN
   constexpr BasicDecimal128(int64_t high, uint64_t low) noexcept
-      : low_bits_(low), high_bits_(high) {}
+      : BasicDecimal128(WordArray{low, static_cast<uint64_t>(high)}) {}
 #else
   constexpr BasicDecimal128(int64_t high, uint64_t low) noexcept
-      : high_bits_(high), low_bits_(low) {}
+      : BasicDecimal128(WordArray{static_cast<uint64_t>(high), low}) {}
 #endif
 
-  /// \brief Create a BasicDecimal256 from the two's complement representation.
-  ///
-  /// Input array is assumed to be in native endianness.
-#if ARROW_LITTLE_ENDIAN
-  constexpr BasicDecimal128(const std::array<uint64_t, 2>& array) noexcept
-      : low_bits_(array[0]), high_bits_(static_cast<int64_t>(array[1])) {}
-#else
-  constexpr BasicDecimal128(const std::array<uint64_t, 2>& array) noexcept
-      : high_bits_(static_cast<int64_t>(array[0])), low_bits_(array[1]) {}
-#endif
-
-  /// \brief Create a BasicDecimal128 from the two's complement representation.
-  ///
-  /// Input array is assumed to be in little endianness, with native endian elements.
-  BasicDecimal128(LittleEndianArrayTag, const std::array<uint64_t, 2>& array) noexcept
-      : BasicDecimal128(bit_util::little_endian::ToNative(array)) {}
-
-  /// \brief Empty constructor creates a BasicDecimal128 with a value of 0.
-  constexpr BasicDecimal128() noexcept : BasicDecimal128(0, 0) {}
-
   /// \brief Convert any integer value into a BasicDecimal128.
   template <typename T,
             typename = typename std::enable_if<
@@ -89,10 +156,6 @@ class ARROW_EXPORT BasicDecimal128 {
       : BasicDecimal128(value >= T{0} ? 0 : -1, static_cast<uint64_t>(value)) {  // NOLINT
   }
 
-  /// \brief Create a BasicDecimal128 from an array of bytes. Bytes are assumed to be in
-  /// native-endian byte order.
-  explicit BasicDecimal128(const uint8_t* bytes);
-
   /// \brief Negate the current value (in-place)
   BasicDecimal128& Negate();
 
@@ -137,59 +200,39 @@ class ARROW_EXPORT BasicDecimal128 {
   /// \brief Shift left by the given number of bits.
   BasicDecimal128& operator<<=(uint32_t bits);
 
+  BasicDecimal128 operator<<(uint32_t bits) const {
+    auto res = *this;
+    res <<= bits;
+    return res;
+  }
+
   /// \brief Shift right by the given number of bits. Negative values will
   BasicDecimal128& operator>>=(uint32_t bits);
 
-  /// \brief Get the high bits of the two's complement representation of the number.
-  inline constexpr int64_t high_bits() const { return high_bits_; }
-
-  /// \brief Get the low bits of the two's complement representation of the number.
-  inline constexpr uint64_t low_bits() const { return low_bits_; }
-
-  /// \brief Get the bits of the two's complement representation of the number.
-  ///
-  /// The 2 elements are in native endian order. The bits within each uint64_t element
-  /// are in native endian order. For example, on a little endian machine,
-  /// BasicDecimal128(123).native_endian_array() = {123, 0};
-  /// but on a big endian machine,
-  /// BasicDecimal128(123).native_endian_array() = {0, 123};
-  inline std::array<uint64_t, 2> native_endian_array() const {
-#if ARROW_LITTLE_ENDIAN
-    return {low_bits_, static_cast<uint64_t>(high_bits_)};
-#else
-    return {static_cast<uint64_t>(high_bits_), low_bits_};
-#endif
+  BasicDecimal128 operator>>(uint32_t bits) const {
+    auto res = *this;
+    res >>= bits;
+    return res;
   }
 
-  /// \brief Get the bits of the two's complement representation of the number.
-  ///
-  /// The 2 elements are in little endian order. However, the bits within each
-  /// uint64_t element are in native endian order.
-  /// For example, BasicDecimal128(123).little_endian_array() = {123, 0};
-  inline std::array<uint64_t, 2> little_endian_array() const {
-    return {low_bits_, static_cast<uint64_t>(high_bits_)};
-  }
-
-  inline const uint8_t* native_endian_bytes() const {
+  /// \brief Get the high bits of the two's complement representation of the number.
+  constexpr int64_t high_bits() const {
 #if ARROW_LITTLE_ENDIAN
-    return reinterpret_cast<const uint8_t*>(&low_bits_);
+    return static_cast<int64_t>(array_[1]);
 #else
-    return reinterpret_cast<const uint8_t*>(&high_bits_);
+    return static_cast<int64_t>(array_[0]);
 #endif
   }
 
-  inline uint8_t* mutable_native_endian_bytes() {
+  /// \brief Get the low bits of the two's complement representation of the number.
+  constexpr uint64_t low_bits() const {
 #if ARROW_LITTLE_ENDIAN
-    return reinterpret_cast<uint8_t*>(&low_bits_);
+    return array_[0];
 #else
-    return reinterpret_cast<uint8_t*>(&high_bits_);
+    return array_[1];
 #endif
   }
 
-  /// \brief Return the raw bytes of the value in native-endian byte order.
-  std::array<uint8_t, 16> ToBytes() const;
-  void ToBytes(uint8_t* out) const;
-
   /// \brief separate the integer and fractional parts for the given scale.
   void GetWholeAndFraction(int32_t scale, BasicDecimal128* whole,
                            BasicDecimal128* fraction) const;
@@ -218,9 +261,6 @@ class ARROW_EXPORT BasicDecimal128 {
   /// Return true if the number of significant digits is less or equal to `precision`.
   bool FitsInPrecision(int32_t precision) const;
 
-  // returns 1 for positive and zero decimal values, -1 for negative decimal values.
-  inline int64_t Sign() const { return 1 | (high_bits_ >> 63); }
-
   /// \brief count the number of leading binary zeroes.
   int32_t CountLeadingBinaryZeros() const;
 
@@ -231,24 +271,15 @@ class ARROW_EXPORT BasicDecimal128 {
   static BasicDecimal128 GetMaxValue(int32_t precision);
 
   /// \brief Get the maximum decimal value (is not a valid value).
-  static inline constexpr BasicDecimal128 GetMaxSentinel() {
+  static constexpr BasicDecimal128 GetMaxSentinel() {
     return BasicDecimal128(/*high=*/std::numeric_limits<int64_t>::max(),
                            /*low=*/std::numeric_limits<uint64_t>::max());
   }
   /// \brief Get the minimum decimal value (is not a valid value).
-  static inline constexpr BasicDecimal128 GetMinSentinel() {
+  static constexpr BasicDecimal128 GetMinSentinel() {
     return BasicDecimal128(/*high=*/std::numeric_limits<int64_t>::min(),
                            /*low=*/std::numeric_limits<uint64_t>::min());
   }
-
- private:
-#if ARROW_LITTLE_ENDIAN
-  uint64_t low_bits_;
-  int64_t high_bits_;
-#else
-  int64_t high_bits_;
-  uint64_t low_bits_;
-#endif
 };
 
 ARROW_EXPORT bool operator==(const BasicDecimal128& left, const BasicDecimal128& right);
@@ -271,58 +302,37 @@ ARROW_EXPORT BasicDecimal128 operator/(const BasicDecimal128& left,
 ARROW_EXPORT BasicDecimal128 operator%(const BasicDecimal128& left,
                                        const BasicDecimal128& right);
 
-class ARROW_EXPORT BasicDecimal256 {
+class ARROW_EXPORT BasicDecimal256 : public GenericBasicDecimal<BasicDecimal256, 256> {
  private:
   // Due to a bug in clang, we have to declare the extend method prior to its
   // usage.
   template <typename T>
-  inline static constexpr uint64_t extend(T low_bits) noexcept {
+  static constexpr uint64_t extend(T low_bits) noexcept {
     return low_bits >= T() ? uint64_t{0} : ~uint64_t{0};
   }
 
-  struct LittleEndianArrayTag {};
-
  public:
-  static constexpr int kBitWidth = 256;
+  using GenericBasicDecimal::GenericBasicDecimal;
+
   static constexpr int kMaxPrecision = 76;
   static constexpr int kMaxScale = 76;
 
-  // A constructor tag to denote a little-endian encoded array
-  static constexpr LittleEndianArrayTag LittleEndianArray{};
-
-  /// \brief Create a BasicDecimal256 from the two's complement representation.
-  ///
-  /// Input array is assumed to be in native endianness.
-  constexpr BasicDecimal256(const std::array<uint64_t, 4>& array) noexcept
-      : array_(array) {}
-
-  /// \brief Create a BasicDecimal256 from the two's complement representation.
-  ///
-  /// Input array is assumed to be in little endianness, with native endian elements.
-  BasicDecimal256(LittleEndianArrayTag, const std::array<uint64_t, 4>& array) noexcept
-      : BasicDecimal256(bit_util::little_endian::ToNative(array)) {}
-
-  /// \brief Empty constructor creates a BasicDecimal256 with a value of 0.
-  constexpr BasicDecimal256() noexcept : array_({0, 0, 0, 0}) {}
+  constexpr BasicDecimal256() noexcept : GenericBasicDecimal() {}
 
   /// \brief Convert any integer value into a BasicDecimal256.
   template <typename T,
             typename = typename std::enable_if<
                 std::is_integral<T>::value && (sizeof(T) <= sizeof(uint64_t)), T>::type>
   constexpr BasicDecimal256(T value) noexcept
-      : array_(bit_util::little_endian::ToNative<uint64_t, 4>(
+      : BasicDecimal256(bit_util::little_endian::ToNative<uint64_t, 4>(
             {static_cast<uint64_t>(value), extend(value), extend(value),
              extend(value)})) {}
 
   explicit BasicDecimal256(const BasicDecimal128& value) noexcept
-      : array_(bit_util::little_endian::ToNative<uint64_t, 4>(
+      : BasicDecimal256(bit_util::little_endian::ToNative<uint64_t, 4>(
             {value.low_bits(), static_cast<uint64_t>(value.high_bits()),
              extend(value.high_bits()), extend(value.high_bits())})) {}
 
-  /// \brief Create a BasicDecimal256 from an array of bytes. Bytes are assumed to be in
-  /// native-endian byte order.
-  explicit BasicDecimal256(const uint8_t* bytes);
-
   /// \brief Negate the current value (in-place)
   BasicDecimal256& Negate();
 
@@ -338,42 +348,8 @@ class ARROW_EXPORT BasicDecimal256 {
   /// \brief Subtract a number from this one. The result is truncated to 256 bits.
   BasicDecimal256& operator-=(const BasicDecimal256& right);
 
-  /// \brief Get the bits of the two's complement representation of the number.
-  ///
-  /// The 4 elements are in native endian order. The bits within each uint64_t element
-  /// are in native endian order. For example, on a little endian machine,
-  ///   BasicDecimal256(123).native_endian_array() = {123, 0, 0, 0};
-  ///   BasicDecimal256(-2).native_endian_array() = {0xFF...FE, 0xFF...FF, 0xFF...FF,
-  /// 0xFF...FF}.
-  /// while on a big endian machine,
-  ///   BasicDecimal256(123).native_endian_array() = {0, 0, 0, 123};
-  ///   BasicDecimal256(-2).native_endian_array() = {0xFF...FF, 0xFF...FF, 0xFF...FF,
-  /// 0xFF...FE}.
-  inline const std::array<uint64_t, 4>& native_endian_array() const { return array_; }
-
-  /// \brief Get the bits of the two's complement representation of the number.
-  ///
-  /// The 4 elements are in little endian order. However, the bits within each
-  /// uint64_t element are in native endian order.
-  /// For example, BasicDecimal256(123).little_endian_array() = {123, 0};
-  inline const std::array<uint64_t, 4> little_endian_array() const {
-    return bit_util::little_endian::FromNative(array_);
-  }
-
-  inline const uint8_t* native_endian_bytes() const {
-    return reinterpret_cast<const uint8_t*>(array_.data());
-  }
-
-  inline uint8_t* mutable_native_endian_bytes() {
-    return reinterpret_cast<uint8_t*>(array_.data());
-  }
-
   /// \brief Get the lowest bits of the two's complement representation of the number.
-  inline uint64_t low_bits() const { return bit_util::little_endian::Make(array_)[0]; }
-
-  /// \brief Return the raw bytes of the value in native-endian byte order.
-  std::array<uint8_t, 32> ToBytes() const;
-  void ToBytes(uint8_t* out) const;
+  uint64_t low_bits() const { return bit_util::little_endian::Make(array_)[0]; }
 
   /// \brief Scale multiplier for given scale value.
   static const BasicDecimal256& GetScaleMultiplier(int32_t scale);
@@ -399,14 +375,6 @@ class ARROW_EXPORT BasicDecimal256 {
   /// Return true if the number of significant digits is less or equal to `precision`.
   bool FitsInPrecision(int32_t precision) const;
 
-  inline int64_t Sign() const {
-    return 1 | (static_cast<int64_t>(bit_util::little_endian::Make(array_)[3]) >> 63);
-  }
-
-  inline int64_t IsNegative() const {
-    return static_cast<int64_t>(bit_util::little_endian::Make(array_)[3]) < 0;
-  }
-
   /// \brief Multiply this number by another number. The result is truncated to 256 bits.
   BasicDecimal256& operator*=(const BasicDecimal256& right);
 
@@ -427,6 +395,12 @@ class ARROW_EXPORT BasicDecimal256 {
   /// \brief Shift left by the given number of bits.
   BasicDecimal256& operator<<=(uint32_t bits);
 
+  BasicDecimal256 operator<<(uint32_t bits) const {
+    auto res = *this;
+    res <<= bits;
+    return res;
+  }
+
   /// \brief In-place division.
   BasicDecimal256& operator/=(const BasicDecimal256& right);
 
@@ -434,7 +408,7 @@ class ARROW_EXPORT BasicDecimal256 {
   static BasicDecimal256 GetMaxValue(int32_t precision);
 
   /// \brief Get the maximum decimal value (is not a valid value).
-  static inline constexpr BasicDecimal256 GetMaxSentinel() {
+  static constexpr BasicDecimal256 GetMaxSentinel() {
 #if ARROW_LITTLE_ENDIAN
     return BasicDecimal256({std::numeric_limits<uint64_t>::max(),
                             std::numeric_limits<uint64_t>::max(),
@@ -448,7 +422,7 @@ class ARROW_EXPORT BasicDecimal256 {
 #endif
   }
   /// \brief Get the minimum decimal value (is not a valid value).
-  static inline constexpr BasicDecimal256 GetMinSentinel() {
+  static constexpr BasicDecimal256 GetMinSentinel() {
 #if ARROW_LITTLE_ENDIAN
     return BasicDecimal256(
         {0, 0, 0, static_cast<uint64_t>(std::numeric_limits<int64_t>::min())});
@@ -457,9 +431,6 @@ class ARROW_EXPORT BasicDecimal256 {
         {static_cast<uint64_t>(std::numeric_limits<int64_t>::min()), 0, 0, 0});
 #endif
   }
-
- private:
-  std::array<uint64_t, 4> array_;
 };
 
 ARROW_EXPORT inline bool operator==(const BasicDecimal256& left,
@@ -497,4 +468,5 @@ ARROW_EXPORT BasicDecimal256 operator*(const BasicDecimal256& left,
                                        const BasicDecimal256& right);
 ARROW_EXPORT BasicDecimal256 operator/(const BasicDecimal256& left,
                                        const BasicDecimal256& right);
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index d29ea65734ce2..d35223a1f4ad7 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -320,7 +320,7 @@ std::string Decimal128::ToIntegerString() const {
 Decimal128::operator int64_t() const {
   DCHECK(high_bits() == 0 || high_bits() == -1)
       << "Trying to cast a Decimal128 greater than the value range of a "
-         "int64_t. high_bits_ must be equal to 0 or -1, got: "
+         "int64_t; high_bits() must be equal to 0 or -1, got: "
       << high_bits();
   return static_cast<int64_t>(low_bits());
 }
diff --git a/cpp/src/arrow/util/decimal_test.cc b/cpp/src/arrow/util/decimal_test.cc
index ca4f64228d171..2be9cfd9feb3f 100644
--- a/cpp/src/arrow/util/decimal_test.cc
+++ b/cpp/src/arrow/util/decimal_test.cc
@@ -81,6 +81,11 @@ Decimal256 Decimal256FromLE(const std::array<uint64_t, 4>& a) {
   return Decimal256(Decimal256::LittleEndianArray, a);
 }
 
+Decimal128 Decimal128FromInt128(int128_t value) {
+  return Decimal128(static_cast<int64_t>(value >> 64),
+                    static_cast<uint64_t>(value & 0xFFFFFFFFFFFFFFFFULL));
+}
+
 template <typename DecimalType>
 struct DecimalTraits {};
 
@@ -1260,11 +1265,6 @@ std::vector<CType> GetRandomNumbers(int32_t size) {
   return ret;
 }
 
-Decimal128 Decimal128FromInt128(int128_t value) {
-  return Decimal128(static_cast<int64_t>(value >> 64),
-                    static_cast<uint64_t>(value & 0xFFFFFFFFFFFFFFFFULL));
-}
-
 TEST(Decimal128Test, Multiply) {
   ASSERT_EQ(Decimal128(60501), Decimal128(301) * Decimal128(201));
 
@@ -1545,6 +1545,102 @@ TEST(Decimal128Test, FitsInPrecision) {
       Decimal128("-100000000000000000000000000000000000000").FitsInPrecision(38));
 }
 
+TEST(Decimal128Test, LeftShift) {
+  auto check = [](int128_t x, uint32_t bits) {
+    auto expected = Decimal128FromInt128(x << bits);
+    auto actual = Decimal128FromInt128(x) << bits;
+    ASSERT_EQ(actual.low_bits(), expected.low_bits());
+    ASSERT_EQ(actual.high_bits(), expected.high_bits());
+  };
+
+  ASSERT_EQ(Decimal128("0"), Decimal128("0") << 0);
+  ASSERT_EQ(Decimal128("0"), Decimal128("0") << 1);
+  ASSERT_EQ(Decimal128("0"), Decimal128("0") << 63);
+  ASSERT_EQ(Decimal128("0"), Decimal128("0") << 127);
+
+  check(123, 0);
+  check(123, 1);
+  check(123, 63);
+  check(123, 64);
+  check(123, 120);
+
+  ASSERT_EQ(Decimal128("199999999999998"), Decimal128("99999999999999") << 1);
+  ASSERT_EQ(Decimal128("3435973836799965640261632"), Decimal128("99999999999999") << 35);
+  ASSERT_EQ(Decimal128("120892581961461708544797985370825293824"),
+            Decimal128("99999999999999") << 80);
+
+  ASSERT_EQ(Decimal128("1234567890123456789012"), Decimal128("1234567890123456789012")
+                                                      << 0);
+  ASSERT_EQ(Decimal128("2469135780246913578024"), Decimal128("1234567890123456789012")
+                                                      << 1);
+  ASSERT_EQ(Decimal128("88959991838777271103427858320412639232"),
+            Decimal128("1234567890123456789012") << 56);
+
+  check(-123, 0);
+  check(-123, 1);
+  check(-123, 63);
+  check(-123, 64);
+  check(-123, 120);
+
+  ASSERT_EQ(Decimal128("-199999999999998"), Decimal128("-99999999999999") << 1);
+  ASSERT_EQ(Decimal128("-3435973836799965640261632"), Decimal128("-99999999999999")
+                                                          << 35);
+  ASSERT_EQ(Decimal128("-120892581961461708544797985370825293824"),
+            Decimal128("-99999999999999") << 80);
+
+  ASSERT_EQ(Decimal128("-1234567890123456789012"), Decimal128("-1234567890123456789012")
+                                                       << 0);
+  ASSERT_EQ(Decimal128("-2469135780246913578024"), Decimal128("-1234567890123456789012")
+                                                       << 1);
+  ASSERT_EQ(Decimal128("-88959991838777271103427858320412639232"),
+            Decimal128("-1234567890123456789012") << 56);
+}
+
+TEST(Decimal128Test, RightShift) {
+  ASSERT_EQ(Decimal128("0"), Decimal128("0") >> 0);
+  ASSERT_EQ(Decimal128("0"), Decimal128("0") >> 1);
+  ASSERT_EQ(Decimal128("0"), Decimal128("0") >> 63);
+  ASSERT_EQ(Decimal128("0"), Decimal128("0") >> 127);
+
+  ASSERT_EQ(Decimal128("1"), Decimal128("1") >> 0);
+  ASSERT_EQ(Decimal128("0"), Decimal128("1") >> 1);
+  ASSERT_EQ(Decimal128("0"), Decimal128("1") >> 63);
+  ASSERT_EQ(Decimal128("0"), Decimal128("1") >> 127);
+
+  ASSERT_EQ(Decimal128("-1"), Decimal128("-1") >> 0);
+  ASSERT_EQ(Decimal128("-1"), Decimal128("-1") >> 1);
+  ASSERT_EQ(Decimal128("-1"), Decimal128("-1") >> 63);
+  ASSERT_EQ(Decimal128("-1"), Decimal128("-1") >> 127);
+
+  ASSERT_EQ(Decimal128("1096516"), Decimal128("1234567890123456789012") >> 50);
+  ASSERT_EQ(Decimal128("66"), Decimal128("1234567890123456789012") >> 64);
+  ASSERT_EQ(Decimal128("2"), Decimal128("1234567890123456789012") >> 69);
+  ASSERT_EQ(Decimal128("0"), Decimal128("1234567890123456789012") >> 71);
+  ASSERT_EQ(Decimal128("0"), Decimal128("1234567890123456789012") >> 127);
+
+  ASSERT_EQ(Decimal128("-1096517"), Decimal128("-1234567890123456789012") >> 50);
+  ASSERT_EQ(Decimal128("-67"), Decimal128("-1234567890123456789012") >> 64);
+  ASSERT_EQ(Decimal128("-3"), Decimal128("-1234567890123456789012") >> 69);
+  ASSERT_EQ(Decimal128("-1"), Decimal128("-1234567890123456789012") >> 71);
+  ASSERT_EQ(Decimal128("-1"), Decimal128("-1234567890123456789012") >> 127);
+}
+
+TEST(Decimal128Test, Negate) {
+  auto check = [](Decimal128 pos, Decimal128 neg) {
+    EXPECT_EQ(-pos, neg);
+    EXPECT_EQ(-neg, pos);
+  };
+
+  check(Decimal128(0, 0), Decimal128(0, 0));
+  check(Decimal128(0, 1), Decimal128(-1, 0xFFFFFFFFFFFFFFFFULL));
+  check(Decimal128(0, 2), Decimal128(-1, 0xFFFFFFFFFFFFFFFEULL));
+  check(Decimal128(0, 0x8000000000000000ULL), Decimal128(-1, 0x8000000000000000ULL));
+  check(Decimal128(0, 0xFFFFFFFFFFFFFFFFULL), Decimal128(-1, 1));
+  check(Decimal128(12, 0), Decimal128(-12, 0));
+  check(Decimal128(12, 1), Decimal128(-13, 0xFFFFFFFFFFFFFFFFULL));
+  check(Decimal128(12, 0xFFFFFFFFFFFFFFFFULL), Decimal128(-13, 1));
+}
+
 static constexpr std::array<uint64_t, 4> kSortedDecimal256Bits[] = {
     {0, 0, 0, 0x8000000000000000ULL},  // min
     {0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,