From 8050354a259d0249c6ee6c6604a55e6d2d8593de Mon Sep 17 00:00:00 2001 From: Yaron Gvili Date: Tue, 2 Aug 2022 16:01:16 -0400 Subject: [PATCH 01/12] ARROW-17289: [C++] Add type category membership checks --- cpp/src/arrow/type.cc | 80 ++++++++++++++++++++++++++++++++++++++ cpp/src/arrow/type.h | 28 +++++++++++++ cpp/src/arrow/type_test.cc | 22 +++++++++++ 3 files changed, 130 insertions(+) diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index efff07db6671f..c3c4c794716a0 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -2382,6 +2382,15 @@ std::vector> g_base_binary_types; std::vector> g_temporal_types; std::vector> g_interval_types; std::vector> g_primitive_types; +std::unordered_set> g_signed_int_types_set; +std::unordered_set> g_unsigned_int_types_set; +std::unordered_set> g_int_types_set; +std::unordered_set> g_floating_types_set; +std::unordered_set> g_numeric_types_set; +std::unordered_set> g_base_binary_types_set; +std::unordered_set> g_temporal_types_set; +std::unordered_set> g_interval_types_set; +std::unordered_set> g_primitive_types_set; std::once_flag static_data_initialized; template @@ -2389,6 +2398,11 @@ void Extend(const std::vector& values, std::vector* out) { out->insert(out->end(), values.begin(), values.end()); } +template +void Place(std::unordered_set& set, const std::vector& values) { + set.insert(values.begin(), values.end()); +} + void InitStaticData() { // Signed int types g_signed_int_types = {int8(), int16(), int32(), int64()}; @@ -2435,6 +2449,17 @@ void InitStaticData() { g_primitive_types = {null(), boolean(), date32(), date64()}; Extend(g_numeric_types, &g_primitive_types); Extend(g_base_binary_types, &g_primitive_types); + + // All sets + Place(g_signed_int_types_set, g_signed_int_types); + Place(g_unsigned_int_types_set, g_unsigned_int_types); + Place(g_int_types_set, g_int_types); + Place(g_floating_types_set, g_floating_types); + Place(g_numeric_types_set, g_numeric_types); + Place(g_base_binary_types_set, g_base_binary_types); + Place(g_temporal_types_set, g_temporal_types); + Place(g_interval_types_set, g_interval_types); + Place(g_primitive_types_set, g_primitive_types); } } // namespace @@ -2494,6 +2519,61 @@ const std::vector>& PrimitiveTypes() { return g_primitive_types; } +bool IsBaseBinaryType(std::shared_ptr type) { + std::call_once(static_data_initialized, InitStaticData); + return g_base_binary_types_set.find(type) != g_base_binary_types_set.end(); +} + +bool IsBinaryType(std::shared_ptr type) { + static DataTypeVector types = {binary(), large_binary()}; + return std::find(types.begin(), types.end(), type) != types.end(); +} + +bool IsStringType(std::shared_ptr type) { + static DataTypeVector types = {utf8(), large_utf8()}; + return std::find(types.begin(), types.end(), type) != types.end(); +} + +bool IsSignedIntType(std::shared_ptr type) { + std::call_once(static_data_initialized, InitStaticData); + return g_signed_int_types_set.find(type) != g_signed_int_types_set.end(); +} + +bool IsUnsignedIntType(std::shared_ptr type) { + std::call_once(static_data_initialized, InitStaticData); + return g_unsigned_int_types_set.find(type) != g_unsigned_int_types_set.end(); +} + +bool IsIntType(std::shared_ptr type) { + std::call_once(static_data_initialized, InitStaticData); + return g_int_types_set.find(type) != g_int_types_set.end(); +} + +bool IsFloatingPointType(std::shared_ptr type) { + std::call_once(static_data_initialized, InitStaticData); + return g_floating_types_set.find(type) != g_floating_types_set.end(); +} + +bool IsNumericType(std::shared_ptr type) { + std::call_once(static_data_initialized, InitStaticData); + return g_numeric_types_set.find(type) != g_numeric_types_set.end(); +} + +bool IsTemporalType(std::shared_ptr type) { + std::call_once(static_data_initialized, InitStaticData); + return g_temporal_types_set.find(type) != g_temporal_types_set.end(); +} + +bool IsIntervalType(std::shared_ptr type) { + std::call_once(static_data_initialized, InitStaticData); + return g_interval_types_set.find(type) != g_interval_types_set.end(); +} + +bool IsPrimitiveType(std::shared_ptr type) { + std::call_once(static_data_initialized, InitStaticData); + return g_primitive_types_set.find(type) != g_primitive_types_set.end(); +} + const std::vector& TimeUnit::values() { static std::vector units = {TimeUnit::SECOND, TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO}; diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index f3ac2d62d8268..bb9cf2c48c7a2 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -2151,4 +2151,32 @@ const std::vector>& IntervalTypes(); ARROW_EXPORT const std::vector>& PrimitiveTypes(); +ARROW_EXPORT +bool IsSignedIntType(std::shared_ptr); +ARROW_EXPORT +bool IsUnsignedIntType(std::shared_ptr); +ARROW_EXPORT +bool IsIntType(std::shared_ptr); +ARROW_EXPORT +bool IsFloatingPointType(std::shared_ptr); +// Number types without boolean +ARROW_EXPORT +bool IsNumericType(std::shared_ptr); +// Binary and string-like types (except fixed-size binary) +ARROW_EXPORT +bool IsBaseBinaryType(std::shared_ptr); +ARROW_EXPORT +bool IsBinaryType(std::shared_ptr); +ARROW_EXPORT +bool IsStringType(std::shared_ptr); +// Temporal types including time and timestamps for each unit +ARROW_EXPORT +bool IsTemporalType(std::shared_ptr); +// Interval types +ARROW_EXPORT +bool IsIntervalType(std::shared_ptr); +// Integer, floating point, base binary, and temporal +ARROW_EXPORT +bool IsPrimitiveType(std::shared_ptr); + } // namespace arrow diff --git a/cpp/src/arrow/type_test.cc b/cpp/src/arrow/type_test.cc index c7ac5f6c7f22e..d72d5981b9ba4 100644 --- a/cpp/src/arrow/type_test.cc +++ b/cpp/src/arrow/type_test.cc @@ -1821,4 +1821,26 @@ TEST(TypesTest, TestDecimalEquals) { AssertTypeNotEqual(t5, t10); } +static void test_membership( + const std::vector>& types, + std::function)> membership_check) { + for (auto type : types) { + ASSERT_TRUE(membership_check(type)); + } +} + +TEST(TypesTest, TestMembership) { + test_membership(BaseBinaryTypes(), IsBaseBinaryType); + test_membership(BinaryTypes(), IsBinaryType); + test_membership(StringTypes(), IsStringType); + test_membership(SignedIntTypes(), IsSignedIntType); + test_membership(UnsignedIntTypes(), IsUnsignedIntType); + test_membership(IntTypes(), IsIntType); + test_membership(FloatingPointTypes(), IsFloatingPointType); + test_membership(NumericTypes(), IsNumericType); + test_membership(TemporalTypes(), IsTemporalType); + test_membership(IntervalTypes(), IsIntervalType); + test_membership(PrimitiveTypes(), IsPrimitiveType); +} + } // namespace arrow From f38ec91829cf198e6bb7452cf7579f757e6b688c Mon Sep 17 00:00:00 2001 From: Yaron Gvili Date: Wed, 3 Aug 2022 12:02:35 -0400 Subject: [PATCH 02/12] use type id, improve test coverage of types --- cpp/src/arrow/type.cc | 80 ---------------------- cpp/src/arrow/type.h | 28 -------- cpp/src/arrow/type_test.cc | 47 ++++++++----- cpp/src/arrow/type_traits.h | 133 ++++++++++++++++++++++++++++++++++++ 4 files changed, 164 insertions(+), 124 deletions(-) diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index c3c4c794716a0..efff07db6671f 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -2382,15 +2382,6 @@ std::vector> g_base_binary_types; std::vector> g_temporal_types; std::vector> g_interval_types; std::vector> g_primitive_types; -std::unordered_set> g_signed_int_types_set; -std::unordered_set> g_unsigned_int_types_set; -std::unordered_set> g_int_types_set; -std::unordered_set> g_floating_types_set; -std::unordered_set> g_numeric_types_set; -std::unordered_set> g_base_binary_types_set; -std::unordered_set> g_temporal_types_set; -std::unordered_set> g_interval_types_set; -std::unordered_set> g_primitive_types_set; std::once_flag static_data_initialized; template @@ -2398,11 +2389,6 @@ void Extend(const std::vector& values, std::vector* out) { out->insert(out->end(), values.begin(), values.end()); } -template -void Place(std::unordered_set& set, const std::vector& values) { - set.insert(values.begin(), values.end()); -} - void InitStaticData() { // Signed int types g_signed_int_types = {int8(), int16(), int32(), int64()}; @@ -2449,17 +2435,6 @@ void InitStaticData() { g_primitive_types = {null(), boolean(), date32(), date64()}; Extend(g_numeric_types, &g_primitive_types); Extend(g_base_binary_types, &g_primitive_types); - - // All sets - Place(g_signed_int_types_set, g_signed_int_types); - Place(g_unsigned_int_types_set, g_unsigned_int_types); - Place(g_int_types_set, g_int_types); - Place(g_floating_types_set, g_floating_types); - Place(g_numeric_types_set, g_numeric_types); - Place(g_base_binary_types_set, g_base_binary_types); - Place(g_temporal_types_set, g_temporal_types); - Place(g_interval_types_set, g_interval_types); - Place(g_primitive_types_set, g_primitive_types); } } // namespace @@ -2519,61 +2494,6 @@ const std::vector>& PrimitiveTypes() { return g_primitive_types; } -bool IsBaseBinaryType(std::shared_ptr type) { - std::call_once(static_data_initialized, InitStaticData); - return g_base_binary_types_set.find(type) != g_base_binary_types_set.end(); -} - -bool IsBinaryType(std::shared_ptr type) { - static DataTypeVector types = {binary(), large_binary()}; - return std::find(types.begin(), types.end(), type) != types.end(); -} - -bool IsStringType(std::shared_ptr type) { - static DataTypeVector types = {utf8(), large_utf8()}; - return std::find(types.begin(), types.end(), type) != types.end(); -} - -bool IsSignedIntType(std::shared_ptr type) { - std::call_once(static_data_initialized, InitStaticData); - return g_signed_int_types_set.find(type) != g_signed_int_types_set.end(); -} - -bool IsUnsignedIntType(std::shared_ptr type) { - std::call_once(static_data_initialized, InitStaticData); - return g_unsigned_int_types_set.find(type) != g_unsigned_int_types_set.end(); -} - -bool IsIntType(std::shared_ptr type) { - std::call_once(static_data_initialized, InitStaticData); - return g_int_types_set.find(type) != g_int_types_set.end(); -} - -bool IsFloatingPointType(std::shared_ptr type) { - std::call_once(static_data_initialized, InitStaticData); - return g_floating_types_set.find(type) != g_floating_types_set.end(); -} - -bool IsNumericType(std::shared_ptr type) { - std::call_once(static_data_initialized, InitStaticData); - return g_numeric_types_set.find(type) != g_numeric_types_set.end(); -} - -bool IsTemporalType(std::shared_ptr type) { - std::call_once(static_data_initialized, InitStaticData); - return g_temporal_types_set.find(type) != g_temporal_types_set.end(); -} - -bool IsIntervalType(std::shared_ptr type) { - std::call_once(static_data_initialized, InitStaticData); - return g_interval_types_set.find(type) != g_interval_types_set.end(); -} - -bool IsPrimitiveType(std::shared_ptr type) { - std::call_once(static_data_initialized, InitStaticData); - return g_primitive_types_set.find(type) != g_primitive_types_set.end(); -} - const std::vector& TimeUnit::values() { static std::vector units = {TimeUnit::SECOND, TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO}; diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index bb9cf2c48c7a2..f3ac2d62d8268 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -2151,32 +2151,4 @@ const std::vector>& IntervalTypes(); ARROW_EXPORT const std::vector>& PrimitiveTypes(); -ARROW_EXPORT -bool IsSignedIntType(std::shared_ptr); -ARROW_EXPORT -bool IsUnsignedIntType(std::shared_ptr); -ARROW_EXPORT -bool IsIntType(std::shared_ptr); -ARROW_EXPORT -bool IsFloatingPointType(std::shared_ptr); -// Number types without boolean -ARROW_EXPORT -bool IsNumericType(std::shared_ptr); -// Binary and string-like types (except fixed-size binary) -ARROW_EXPORT -bool IsBaseBinaryType(std::shared_ptr); -ARROW_EXPORT -bool IsBinaryType(std::shared_ptr); -ARROW_EXPORT -bool IsStringType(std::shared_ptr); -// Temporal types including time and timestamps for each unit -ARROW_EXPORT -bool IsTemporalType(std::shared_ptr); -// Interval types -ARROW_EXPORT -bool IsIntervalType(std::shared_ptr); -// Integer, floating point, base binary, and temporal -ARROW_EXPORT -bool IsPrimitiveType(std::shared_ptr); - } // namespace arrow diff --git a/cpp/src/arrow/type_test.cc b/cpp/src/arrow/type_test.cc index d72d5981b9ba4..6a99d7cedb6b0 100644 --- a/cpp/src/arrow/type_test.cc +++ b/cpp/src/arrow/type_test.cc @@ -1821,26 +1821,41 @@ TEST(TypesTest, TestDecimalEquals) { AssertTypeNotEqual(t5, t10); } -static void test_membership( - const std::vector>& types, - std::function)> membership_check) { - for (auto type : types) { - ASSERT_TRUE(membership_check(type)); +static void test_membership(const std::vector>& all_types, + const std::vector>& member_types, + std::function membership_check) { + for (auto type : all_types) { + bool expect = + std::find(member_types.begin(), member_types.end(), type) != member_types.end(); + ASSERT_EQ(expect, membership_check(*type)); } } TEST(TypesTest, TestMembership) { - test_membership(BaseBinaryTypes(), IsBaseBinaryType); - test_membership(BinaryTypes(), IsBinaryType); - test_membership(StringTypes(), IsStringType); - test_membership(SignedIntTypes(), IsSignedIntType); - test_membership(UnsignedIntTypes(), IsUnsignedIntType); - test_membership(IntTypes(), IsIntType); - test_membership(FloatingPointTypes(), IsFloatingPointType); - test_membership(NumericTypes(), IsNumericType); - test_membership(TemporalTypes(), IsTemporalType); - test_membership(IntervalTypes(), IsIntervalType); - test_membership(PrimitiveTypes(), IsPrimitiveType); + std::vector> all_types; + for (auto type : NumericTypes()) { + all_types.push_back(type); + } + for (auto type : TemporalTypes()) { + all_types.push_back(type); + } + for (auto type : IntervalTypes()) { + all_types.push_back(type); + } + for (auto type : PrimitiveTypes()) { + all_types.push_back(type); + } + test_membership(all_types, BaseBinaryTypes(), IsBaseBinaryType); + test_membership(all_types, BinaryTypes(), IsBinaryType); + test_membership(all_types, StringTypes(), IsStringType); + test_membership(all_types, SignedIntTypes(), IsSignedIntType); + test_membership(all_types, UnsignedIntTypes(), IsUnsignedIntType); + test_membership(all_types, IntTypes(), IsIntType); + test_membership(all_types, FloatingPointTypes(), IsFloatingPointType); + test_membership(all_types, NumericTypes(), IsNumericType); + test_membership(all_types, TemporalTypes(), IsTemporalType); + test_membership(all_types, IntervalTypes(), IsIntervalType); + test_membership(all_types, PrimitiveTypes(), IsPrimitiveType); } } // namespace arrow diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index 221b35ce57323..30804f6f22ed0 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -901,6 +901,26 @@ static inline bool is_floating(Type::type type_id) { return false; } +static inline bool is_numeric(Type::type type_id) { + switch (type_id) { + case Type::UINT8: + case Type::INT8: + case Type::UINT16: + case Type::INT16: + case Type::UINT32: + case Type::INT32: + case Type::UINT64: + case Type::INT64: + case Type::HALF_FLOAT: + case Type::FLOAT: + case Type::DOUBLE: + return true; + default: + break; + } + return false; +} + static inline bool is_decimal(Type::type type_id) { switch (type_id) { case Type::DECIMAL128: @@ -942,6 +962,34 @@ static inline bool is_primitive(Type::type type_id) { return false; } +static inline bool is_primitive_like(Type::type type_id) { + switch (type_id) { + case Type::NA: + case Type::BOOL: + case Type::UINT8: + case Type::INT8: + case Type::UINT16: + case Type::INT16: + case Type::UINT32: + case Type::INT32: + case Type::UINT64: + case Type::INT64: + case Type::HALF_FLOAT: + case Type::FLOAT: + case Type::DOUBLE: + case Type::DATE32: + case Type::DATE64: + case Type::BINARY: + case Type::STRING: + case Type::LARGE_BINARY: + case Type::LARGE_STRING: + return true; + default: + break; + } + return false; +} + static inline bool is_base_binary_like(Type::type type_id) { switch (type_id) { case Type::BINARY: @@ -977,6 +1025,54 @@ static inline bool is_large_binary_like(Type::type type_id) { return false; } +static inline bool is_binary(Type::type type_id) { + switch (type_id) { + case Type::BINARY: + case Type::LARGE_BINARY: + return true; + default: + break; + } + return false; +} + +static inline bool is_string(Type::type type_id) { + switch (type_id) { + case Type::STRING: + case Type::LARGE_STRING: + return true; + default: + break; + } + return false; +} + +static inline bool is_temporal(Type::type type_id) { + switch (type_id) { + case Type::DATE32: + case Type::DATE64: + case Type::TIME32: + case Type::TIME64: + case Type::TIMESTAMP: + return true; + default: + break; + } + return false; +} + +static inline bool is_interval(Type::type type_id) { + switch (type_id) { + case Type::INTERVAL_MONTHS: + case Type::INTERVAL_DAY_TIME: + case Type::INTERVAL_MONTH_DAY_NANO: + return true; + default: + break; + } + return false; +} + static inline bool is_dictionary(Type::type type_id) { return type_id == Type::DICTIONARY; } @@ -1105,4 +1201,41 @@ static inline int offset_bit_width(Type::type type_id) { /// @} +static inline bool IsSignedIntType(const DataType& type) { + return is_signed_integer(type.id()); +} + +static inline bool IsUnsignedIntType(const DataType& type) { + return is_unsigned_integer(type.id()); +} + +static inline bool IsIntType(const DataType& type) { return is_integer(type.id()); } + +static inline bool IsFloatingPointType(const DataType& type) { + return is_floating(type.id()); +} + +// Number types without boolean +static inline bool IsNumericType(const DataType& type) { return is_numeric(type.id()); } + +// Binary and string-like types (except fixed-size binary) +static inline bool IsBaseBinaryType(const DataType& type) { + return is_base_binary_like(type.id()); +} + +static inline bool IsBinaryType(const DataType& type) { return is_binary(type.id()); } + +static inline bool IsStringType(const DataType& type) { return is_string(type.id()); } + +// Temporal types including time and timestamps for each unit +static inline bool IsTemporalType(const DataType& type) { return is_temporal(type.id()); } + +// Interval types +static inline bool IsIntervalType(const DataType& type) { return is_interval(type.id()); } + +// Integer, floating point, base binary, and temporal +static inline bool IsPrimitiveType(const DataType& type) { + return is_primitive_like(type.id()); +} + } // namespace arrow From c3d4d35d59f6820a386d162fae887ff7f2d86d4c Mon Sep 17 00:00:00 2001 From: Yaron Gvili Date: Thu, 4 Aug 2022 04:41:59 -0400 Subject: [PATCH 03/12] add docs --- cpp/src/arrow/type.h | 16 +++++--- cpp/src/arrow/type_traits.h | 75 +++++++++++++++++++++++++++++++++---- 2 files changed, 79 insertions(+), 12 deletions(-) diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index f3ac2d62d8268..50c02b1a6d46a 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -2123,31 +2123,37 @@ std::string ToString(TimeUnit::type unit); // Helpers to get instances of data types based on general categories +/// \brief Signed integer types ARROW_EXPORT const std::vector>& SignedIntTypes(); +/// \brief Unsigned integer types ARROW_EXPORT const std::vector>& UnsignedIntTypes(); +/// \brief Signed and unsigned integer types ARROW_EXPORT const std::vector>& IntTypes(); +/// \brief Floating point types ARROW_EXPORT const std::vector>& FloatingPointTypes(); -// Number types without boolean +/// \brief Number types without boolean - integer and floating point types ARROW_EXPORT const std::vector>& NumericTypes(); -// Binary and string-like types (except fixed-size binary) +/// \brief Binary and string-like types (except fixed-size binary) ARROW_EXPORT const std::vector>& BaseBinaryTypes(); +/// \brief Binary and large-binary types ARROW_EXPORT const std::vector>& BinaryTypes(); +/// \brief String and large-string types ARROW_EXPORT const std::vector>& StringTypes(); -// Temporal types including time and timestamps for each unit +/// \brief Temporal types including date, time and timestamps for each unit ARROW_EXPORT const std::vector>& TemporalTypes(); -// Interval types +/// \brief Interval types ARROW_EXPORT const std::vector>& IntervalTypes(); -// Integer, floating point, base binary, and temporal +/// \brief Numeric, base binary, date, boolean and null types ARROW_EXPORT const std::vector>& PrimitiveTypes(); diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index 30804f6f22ed0..2f04f84324e82 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -1199,43 +1199,104 @@ static inline int offset_bit_width(Type::type type_id) { return 0; } -/// @} - +/// \brief Check for a signed integer type +/// +/// \param[in] type the type to check +/// \return whether type is a signed integer type +/// +/// Convenience for checking membership of a type in SignedIntTypes() static inline bool IsSignedIntType(const DataType& type) { return is_signed_integer(type.id()); } +/// \brief Check for an unsigned integer type +/// +/// \param[in] type the type to check +/// \return whether type is an unsigned integer type +/// +/// Convenience for checking membership of a type in UnsignedIntTypes() static inline bool IsUnsignedIntType(const DataType& type) { return is_unsigned_integer(type.id()); } +/// \brief Check for an integer type (signed or unsigned integer type) +/// +/// \param[in] type the type to check +/// \return whether type is an integer type +/// +/// Convenience for checking membership of a type in IntTypes() static inline bool IsIntType(const DataType& type) { return is_integer(type.id()); } +/// \brief Check for a floating point type +/// +/// \param[in] type the type to check +/// \return whether type is a floating point type +/// +/// Convenience for checking membership of a type in FloatingPointTypes() static inline bool IsFloatingPointType(const DataType& type) { return is_floating(type.id()); } -// Number types without boolean +/// \brief Check for a numeric type (number except boolean type) +/// +/// \param[in] type the type to check +/// \return whether type is a numeric type +/// +/// Convenience for checking membership of a type in NumericTypes() static inline bool IsNumericType(const DataType& type) { return is_numeric(type.id()); } -// Binary and string-like types (except fixed-size binary) +/// \brief Check for a binary or string-like type (except fixed-size binary) +/// +/// \param[in] type the type to check +/// \return whether type is a binary or string-like type +/// +/// Convenience for checking membership of a type in BaseBinaryTypes() static inline bool IsBaseBinaryType(const DataType& type) { return is_base_binary_like(type.id()); } +/// \brief Check for a binary type +/// +/// \param[in] type the type to check +/// \return whether type is a binary type +/// +/// Convenience for checking membership of a type in BinaryTypes() static inline bool IsBinaryType(const DataType& type) { return is_binary(type.id()); } +/// \brief Check for a string type +/// +/// \param[in] type the type to check +/// \return whether type is a string type +/// +/// Convenience for checking membership of a type in StringTypes() static inline bool IsStringType(const DataType& type) { return is_string(type.id()); } -// Temporal types including time and timestamps for each unit +/// \brief Check for a temporal type, including time and timestamps for each unit +/// +/// \param[in] type the type to check +/// \return whether type is a temporal type +/// +/// Convenience for checking membership of a type in TemporalTypes() static inline bool IsTemporalType(const DataType& type) { return is_temporal(type.id()); } -// Interval types +/// \brief Check for an interval type +/// +/// \param[in] type the type to check +/// \return whether type is a interval type +/// +/// Convenience for checking membership of a type in IntervalTypes() static inline bool IsIntervalType(const DataType& type) { return is_interval(type.id()); } -// Integer, floating point, base binary, and temporal +/// \brief Check for a primitive type - numeric, base binary, date, boolean or null +/// +/// \param[in] type the type to check +/// \return whether type is a primitive type +/// +/// Convenience for checking membership of a type in PrimitiveTypes() static inline bool IsPrimitiveType(const DataType& type) { return is_primitive_like(type.id()); } +/// @} + } // namespace arrow From ba232b783a48d7cce1d30b6d9309676d9e5ca7e9 Mon Sep 17 00:00:00 2001 From: Yaron Gvili Date: Thu, 4 Aug 2022 10:06:11 -0400 Subject: [PATCH 04/12] fix type predicates --- cpp/src/arrow/type_test.cc | 47 ++++--- cpp/src/arrow/type_traits.h | 241 +++++++++++++++++++++++++++++++----- 2 files changed, 237 insertions(+), 51 deletions(-) diff --git a/cpp/src/arrow/type_test.cc b/cpp/src/arrow/type_test.cc index 6a99d7cedb6b0..3362183804d63 100644 --- a/cpp/src/arrow/type_test.cc +++ b/cpp/src/arrow/type_test.cc @@ -1821,15 +1821,10 @@ TEST(TypesTest, TestDecimalEquals) { AssertTypeNotEqual(t5, t10); } -static void test_membership(const std::vector>& all_types, - const std::vector>& member_types, - std::function membership_check) { - for (auto type : all_types) { - bool expect = - std::find(member_types.begin(), member_types.end(), type) != member_types.end(); - ASSERT_EQ(expect, membership_check(*type)); +#define TEST_PREDICATE(all_types, type_predicate) \ + for (auto type : all_types) { \ + ASSERT_EQ(type_predicate(type->id()), type_predicate(*type)); \ } -} TEST(TypesTest, TestMembership) { std::vector> all_types; @@ -1845,17 +1840,29 @@ TEST(TypesTest, TestMembership) { for (auto type : PrimitiveTypes()) { all_types.push_back(type); } - test_membership(all_types, BaseBinaryTypes(), IsBaseBinaryType); - test_membership(all_types, BinaryTypes(), IsBinaryType); - test_membership(all_types, StringTypes(), IsStringType); - test_membership(all_types, SignedIntTypes(), IsSignedIntType); - test_membership(all_types, UnsignedIntTypes(), IsUnsignedIntType); - test_membership(all_types, IntTypes(), IsIntType); - test_membership(all_types, FloatingPointTypes(), IsFloatingPointType); - test_membership(all_types, NumericTypes(), IsNumericType); - test_membership(all_types, TemporalTypes(), IsTemporalType); - test_membership(all_types, IntervalTypes(), IsIntervalType); - test_membership(all_types, PrimitiveTypes(), IsPrimitiveType); -} + TEST_PREDICATE(all_types, is_integer); + TEST_PREDICATE(all_types, is_signed_integer); + TEST_PREDICATE(all_types, is_unsigned_integer); + TEST_PREDICATE(all_types, is_floating); + TEST_PREDICATE(all_types, is_numeric); + TEST_PREDICATE(all_types, is_decimal); + TEST_PREDICATE(all_types, is_primitive); + TEST_PREDICATE(all_types, is_primitive_like); + TEST_PREDICATE(all_types, is_base_binary_like); + TEST_PREDICATE(all_types, is_binary_like); + TEST_PREDICATE(all_types, is_large_binary_like); + TEST_PREDICATE(all_types, is_binary); + TEST_PREDICATE(all_types, is_string); + TEST_PREDICATE(all_types, is_temporal); + TEST_PREDICATE(all_types, is_interval); + TEST_PREDICATE(all_types, is_dictionary); + TEST_PREDICATE(all_types, is_fixed_size_binary); + TEST_PREDICATE(all_types, is_fixed_width); + TEST_PREDICATE(all_types, is_list_like); + TEST_PREDICATE(all_types, is_nested); + TEST_PREDICATE(all_types, is_union); +} + +#undef TEST_PREDICATE } // namespace arrow diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index 2f04f84324e82..a38b885685c4c 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -846,6 +846,11 @@ using enable_if_physical_floating_point = /// \addtogroup runtime-type-predicates /// @{ + +/// \brief Check for an integer type (signed or unsigned) +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is an integer type one static inline bool is_integer(Type::type type_id) { switch (type_id) { case Type::UINT8: @@ -863,6 +868,10 @@ static inline bool is_integer(Type::type type_id) { return false; } +/// \brief Check for a signed integer type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a signed integer type one static inline bool is_signed_integer(Type::type type_id) { switch (type_id) { case Type::INT8: @@ -876,6 +885,10 @@ static inline bool is_signed_integer(Type::type type_id) { return false; } +/// \brief Check for an unsigned integer type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is an unsigned integer type one static inline bool is_unsigned_integer(Type::type type_id) { switch (type_id) { case Type::UINT8: @@ -889,6 +902,10 @@ static inline bool is_unsigned_integer(Type::type type_id) { return false; } +/// \brief Check for a floating point type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a floating point type one static inline bool is_floating(Type::type type_id) { switch (type_id) { case Type::HALF_FLOAT: @@ -901,6 +918,10 @@ static inline bool is_floating(Type::type type_id) { return false; } +/// \brief Check for a numeric type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a numeric type one static inline bool is_numeric(Type::type type_id) { switch (type_id) { case Type::UINT8: @@ -921,6 +942,10 @@ static inline bool is_numeric(Type::type type_id) { return false; } +/// \brief Check for a decimal type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a decimal type one static inline bool is_decimal(Type::type type_id) { switch (type_id) { case Type::DECIMAL128: @@ -932,6 +957,10 @@ static inline bool is_decimal(Type::type type_id) { return false; } +/// \brief Check for a primitive type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a primitive type one static inline bool is_primitive(Type::type type_id) { switch (type_id) { case Type::BOOL: @@ -962,6 +991,10 @@ static inline bool is_primitive(Type::type type_id) { return false; } +/// \brief Check for a primitive-like type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a primitive-like type one static inline bool is_primitive_like(Type::type type_id) { switch (type_id) { case Type::NA: @@ -990,6 +1023,10 @@ static inline bool is_primitive_like(Type::type type_id) { return false; } +/// \brief Check for a base-binary-like type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a base-binary-like type one static inline bool is_base_binary_like(Type::type type_id) { switch (type_id) { case Type::BINARY: @@ -1003,6 +1040,10 @@ static inline bool is_base_binary_like(Type::type type_id) { return false; } +/// \brief Check for a binary-like type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a binary-like type one static inline bool is_binary_like(Type::type type_id) { switch (type_id) { case Type::BINARY: @@ -1014,6 +1055,10 @@ static inline bool is_binary_like(Type::type type_id) { return false; } +/// \brief Check for a large-binary-like type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a large-binary-like type one static inline bool is_large_binary_like(Type::type type_id) { switch (type_id) { case Type::LARGE_BINARY: @@ -1025,6 +1070,10 @@ static inline bool is_large_binary_like(Type::type type_id) { return false; } +/// \brief Check for a binary type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a binary type one static inline bool is_binary(Type::type type_id) { switch (type_id) { case Type::BINARY: @@ -1036,6 +1085,10 @@ static inline bool is_binary(Type::type type_id) { return false; } +/// \brief Check for a string type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a string type one static inline bool is_string(Type::type type_id) { switch (type_id) { case Type::STRING: @@ -1047,6 +1100,10 @@ static inline bool is_string(Type::type type_id) { return false; } +/// \brief Check for a temporal type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a temporal type one static inline bool is_temporal(Type::type type_id) { switch (type_id) { case Type::DATE32: @@ -1061,6 +1118,10 @@ static inline bool is_temporal(Type::type type_id) { return false; } +/// \brief Check for an interval type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is an interval type one static inline bool is_interval(Type::type type_id) { switch (type_id) { case Type::INTERVAL_MONTHS: @@ -1073,10 +1134,18 @@ static inline bool is_interval(Type::type type_id) { return false; } +/// \brief Check for a dictionary type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a dictionary type one static inline bool is_dictionary(Type::type type_id) { return type_id == Type::DICTIONARY; } +/// \brief Check for a fixed-size-binary type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a fixed-size-binary type one static inline bool is_fixed_size_binary(Type::type type_id) { switch (type_id) { case Type::DECIMAL128: @@ -1089,6 +1158,10 @@ static inline bool is_fixed_size_binary(Type::type type_id) { return false; } +/// \brief Check for a fixed-width type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a fixed-width type one static inline bool is_fixed_width(Type::type type_id) { return is_primitive(type_id) || is_dictionary(type_id) || is_fixed_size_binary(type_id); } @@ -1141,6 +1214,10 @@ static inline int bit_width(Type::type type_id) { return 0; } +/// \brief Check for a list-like type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a list-like type one static inline bool is_list_like(Type::type type_id) { switch (type_id) { case Type::LIST: @@ -1154,6 +1231,10 @@ static inline bool is_list_like(Type::type type_id) { return false; } +/// \brief Check for a nested type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a nested type one static inline bool is_nested(Type::type type_id) { switch (type_id) { case Type::LIST: @@ -1170,6 +1251,10 @@ static inline bool is_nested(Type::type type_id) { return false; } +/// \brief Check for a union type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a union type one static inline bool is_union(Type::type type_id) { switch (type_id) { case Type::SPARSE_UNION: @@ -1199,13 +1284,21 @@ static inline int offset_bit_width(Type::type type_id) { return 0; } +/// \brief Check for an integer type (signed or unsigned) +/// +/// \param[in] type the type to check +/// \return whether type is an integer type +/// +/// Convenience for checking using the types' id +static inline bool is_integer(const DataType& type) { return is_integer(type.id()); } + /// \brief Check for a signed integer type /// /// \param[in] type the type to check /// \return whether type is a signed integer type /// -/// Convenience for checking membership of a type in SignedIntTypes() -static inline bool IsSignedIntType(const DataType& type) { +/// Convenience for checking using the types' id +static inline bool is_signed_integer(const DataType& type) { return is_signed_integer(type.id()); } @@ -1214,26 +1307,18 @@ static inline bool IsSignedIntType(const DataType& type) { /// \param[in] type the type to check /// \return whether type is an unsigned integer type /// -/// Convenience for checking membership of a type in UnsignedIntTypes() -static inline bool IsUnsignedIntType(const DataType& type) { +/// Convenience for checking using the types' id +static inline bool is_unsigned_integer(const DataType& type) { return is_unsigned_integer(type.id()); } -/// \brief Check for an integer type (signed or unsigned integer type) -/// -/// \param[in] type the type to check -/// \return whether type is an integer type -/// -/// Convenience for checking membership of a type in IntTypes() -static inline bool IsIntType(const DataType& type) { return is_integer(type.id()); } - /// \brief Check for a floating point type /// /// \param[in] type the type to check /// \return whether type is a floating point type /// -/// Convenience for checking membership of a type in FloatingPointTypes() -static inline bool IsFloatingPointType(const DataType& type) { +/// Convenience for checking using the types' id +static inline bool is_floating(const DataType& type) { return is_floating(type.id()); } @@ -1242,61 +1327,155 @@ static inline bool IsFloatingPointType(const DataType& type) { /// \param[in] type the type to check /// \return whether type is a numeric type /// -/// Convenience for checking membership of a type in NumericTypes() -static inline bool IsNumericType(const DataType& type) { return is_numeric(type.id()); } +/// Convenience for checking using the types' id +static inline bool is_numeric(const DataType& type) { return is_numeric(type.id()); } + +/// \brief Check for a decimal type +/// +/// \param[in] type the type to check +/// \return whether type is a decimal type +/// +/// Convenience for checking using the types' id +static inline bool is_decimal(const DataType& type) { return is_decimal(type.id()); } + +/// \brief Check for a primitive type +/// +/// \param[in] type the type to check +/// \return whether type is a primitive type +/// +/// Convenience for checking using the types' id +static inline bool is_primitive(const DataType& type) { + return is_primitive(type.id()); +} + +/// \brief Check for a primitive-like type +/// +/// \param[in] type the type to check +/// \return whether type is a primitive-like type +/// +/// Convenience for checking using the types' id +static inline bool is_primitive_like(const DataType& type) { + return is_primitive_like(type.id()); +} /// \brief Check for a binary or string-like type (except fixed-size binary) /// /// \param[in] type the type to check /// \return whether type is a binary or string-like type /// -/// Convenience for checking membership of a type in BaseBinaryTypes() -static inline bool IsBaseBinaryType(const DataType& type) { +/// Convenience for checking using the types' id +static inline bool is_base_binary_like(const DataType& type) { return is_base_binary_like(type.id()); } +/// \brief Check for a binary-like type +/// +/// \param[in] type the type to check +/// \return whether type is a binary-like type +/// +/// Convenience for checking using the types' id +static inline bool is_binary_like(const DataType& type) { + return is_binary_like(type.id()); +} + +/// \brief Check for a large-binary-like type +/// +/// \param[in] type the type to check +/// \return whether type is a large-binary-like type +/// +/// Convenience for checking using the types' id +static inline bool is_large_binary_like(const DataType& type) { + return is_large_binary_like(type.id()); +} + /// \brief Check for a binary type /// /// \param[in] type the type to check /// \return whether type is a binary type /// -/// Convenience for checking membership of a type in BinaryTypes() -static inline bool IsBinaryType(const DataType& type) { return is_binary(type.id()); } +/// Convenience for checking using the types' id +static inline bool is_binary(const DataType& type) { return is_binary(type.id()); } /// \brief Check for a string type /// /// \param[in] type the type to check /// \return whether type is a string type /// -/// Convenience for checking membership of a type in StringTypes() -static inline bool IsStringType(const DataType& type) { return is_string(type.id()); } +/// Convenience for checking using the types' id +static inline bool is_string(const DataType& type) { return is_string(type.id()); } /// \brief Check for a temporal type, including time and timestamps for each unit /// /// \param[in] type the type to check /// \return whether type is a temporal type /// -/// Convenience for checking membership of a type in TemporalTypes() -static inline bool IsTemporalType(const DataType& type) { return is_temporal(type.id()); } +/// Convenience for checking using the types' id +static inline bool is_temporal(const DataType& type) { return is_temporal(type.id()); } /// \brief Check for an interval type /// /// \param[in] type the type to check /// \return whether type is a interval type /// -/// Convenience for checking membership of a type in IntervalTypes() -static inline bool IsIntervalType(const DataType& type) { return is_interval(type.id()); } +/// Convenience for checking using the types' id +static inline bool is_interval(const DataType& type) { return is_interval(type.id()); } -/// \brief Check for a primitive type - numeric, base binary, date, boolean or null +/// \brief Check for a dictionary type /// /// \param[in] type the type to check -/// \return whether type is a primitive type +/// \return whether type is a dictionary type /// -/// Convenience for checking membership of a type in PrimitiveTypes() -static inline bool IsPrimitiveType(const DataType& type) { - return is_primitive_like(type.id()); +/// Convenience for checking using the types' id +static inline bool is_dictionary(const DataType& type) { + return is_dictionary(type.id()); +} + +/// \brief Check for a fixed-size-binary type +/// +/// \param[in] type the type to check +/// \return whether type is a fixed-size-binary type +/// +/// Convenience for checking using the types' id +static inline bool is_fixed_size_binary(const DataType& type) { + return is_fixed_size_binary(type.id()); +} + +/// \brief Check for a fixed-width type +/// +/// \param[in] type the type to check +/// \return whether type is a fixed-width type +/// +/// Convenience for checking using the types' id +static inline bool is_fixed_width(const DataType& type) { + return is_fixed_width(type.id()); +} + +/// \brief Check for a list-like type +/// +/// \param[in] type the type to check +/// \return whether type is a list-like type +/// +/// Convenience for checking using the types' id +static inline bool is_list_like(const DataType& type) { + return is_list_like(type.id()); } +/// \brief Check for a nested type +/// +/// \param[in] type the type to check +/// \return whether type is a nested type +/// +/// Convenience for checking using the types' id +static inline bool is_nested(const DataType& type) { return is_nested(type.id()); } + +/// \brief Check for a union type +/// +/// \param[in] type the type to check +/// \return whether type is a union type +/// +/// Convenience for checking using the types' id +static inline bool is_union(const DataType& type) { return is_union(type.id()); } + /// @} } // namespace arrow From 6669b7fa48c1dce4fdecc006bf7dfac6fee411b7 Mon Sep 17 00:00:00 2001 From: Yaron Gvili Date: Thu, 4 Aug 2022 10:39:28 -0400 Subject: [PATCH 05/12] lint --- cpp/src/arrow/type_traits.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index a38b885685c4c..4c056ddf2de6b 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -1318,9 +1318,7 @@ static inline bool is_unsigned_integer(const DataType& type) { /// \return whether type is a floating point type /// /// Convenience for checking using the types' id -static inline bool is_floating(const DataType& type) { - return is_floating(type.id()); -} +static inline bool is_floating(const DataType& type) { return is_floating(type.id()); } /// \brief Check for a numeric type (number except boolean type) /// @@ -1344,9 +1342,7 @@ static inline bool is_decimal(const DataType& type) { return is_decimal(type.id( /// \return whether type is a primitive type /// /// Convenience for checking using the types' id -static inline bool is_primitive(const DataType& type) { - return is_primitive(type.id()); -} +static inline bool is_primitive(const DataType& type) { return is_primitive(type.id()); } /// \brief Check for a primitive-like type /// @@ -1456,9 +1452,7 @@ static inline bool is_fixed_width(const DataType& type) { /// \return whether type is a list-like type /// /// Convenience for checking using the types' id -static inline bool is_list_like(const DataType& type) { - return is_list_like(type.id()); -} +static inline bool is_list_like(const DataType& type) { return is_list_like(type.id()); } /// \brief Check for a nested type /// From 99ddf32a2169cdaa39b35d1912e2b2284b0e40b5 Mon Sep 17 00:00:00 2001 From: Yaron Gvili Date: Sun, 7 Aug 2022 09:30:36 -0400 Subject: [PATCH 06/12] remove is_primitive_like --- cpp/src/arrow/type_test.cc | 1 - cpp/src/arrow/type_traits.h | 42 ------------------------------------- 2 files changed, 43 deletions(-) diff --git a/cpp/src/arrow/type_test.cc b/cpp/src/arrow/type_test.cc index 3362183804d63..2d1a0078edadd 100644 --- a/cpp/src/arrow/type_test.cc +++ b/cpp/src/arrow/type_test.cc @@ -1847,7 +1847,6 @@ TEST(TypesTest, TestMembership) { TEST_PREDICATE(all_types, is_numeric); TEST_PREDICATE(all_types, is_decimal); TEST_PREDICATE(all_types, is_primitive); - TEST_PREDICATE(all_types, is_primitive_like); TEST_PREDICATE(all_types, is_base_binary_like); TEST_PREDICATE(all_types, is_binary_like); TEST_PREDICATE(all_types, is_large_binary_like); diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index 4c056ddf2de6b..521b473afb51a 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -991,38 +991,6 @@ static inline bool is_primitive(Type::type type_id) { return false; } -/// \brief Check for a primitive-like type -/// -/// \param[in] type_id the type-id to check -/// \return whether type-id is a primitive-like type one -static inline bool is_primitive_like(Type::type type_id) { - switch (type_id) { - case Type::NA: - case Type::BOOL: - case Type::UINT8: - case Type::INT8: - case Type::UINT16: - case Type::INT16: - case Type::UINT32: - case Type::INT32: - case Type::UINT64: - case Type::INT64: - case Type::HALF_FLOAT: - case Type::FLOAT: - case Type::DOUBLE: - case Type::DATE32: - case Type::DATE64: - case Type::BINARY: - case Type::STRING: - case Type::LARGE_BINARY: - case Type::LARGE_STRING: - return true; - default: - break; - } - return false; -} - /// \brief Check for a base-binary-like type /// /// \param[in] type_id the type-id to check @@ -1344,16 +1312,6 @@ static inline bool is_decimal(const DataType& type) { return is_decimal(type.id( /// Convenience for checking using the types' id static inline bool is_primitive(const DataType& type) { return is_primitive(type.id()); } -/// \brief Check for a primitive-like type -/// -/// \param[in] type the type to check -/// \return whether type is a primitive-like type -/// -/// Convenience for checking using the types' id -static inline bool is_primitive_like(const DataType& type) { - return is_primitive_like(type.id()); -} - /// \brief Check for a binary or string-like type (except fixed-size binary) /// /// \param[in] type the type to check From f01a21598b12c611905a5d022ca1c8a3481e7e34 Mon Sep 17 00:00:00 2001 From: rtpsw Date: Tue, 9 Aug 2022 23:54:59 +0300 Subject: [PATCH 07/12] Update cpp/src/arrow/type_traits.h Co-authored-by: Antoine Pitrou --- cpp/src/arrow/type_traits.h | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index 521b473afb51a..ca538b865344e 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -920,6 +920,7 @@ static inline bool is_floating(Type::type type_id) { /// \brief Check for a numeric type /// +/// This predicate doesn't match decimals (see `is_decimal`). /// \param[in] type_id the type-id to check /// \return whether type-id is a numeric type one static inline bool is_numeric(Type::type type_id) { From 353b7ab526ffb13bd233688595239683b6915162 Mon Sep 17 00:00:00 2001 From: rtpsw Date: Tue, 9 Aug 2022 23:55:36 +0300 Subject: [PATCH 08/12] Update cpp/src/arrow/type_traits.h Co-authored-by: Antoine Pitrou --- cpp/src/arrow/type_traits.h | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index ca538b865344e..f7dbadd9ba730 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -960,6 +960,7 @@ static inline bool is_decimal(Type::type type_id) { /// \brief Check for a primitive type /// +/// This predicate doesn't match null, decimals and binary-like types. /// \param[in] type_id the type-id to check /// \return whether type-id is a primitive type one static inline bool is_primitive(Type::type type_id) { From 15dcecb7e69b2054f8acd03ee1c8f170435bfb73 Mon Sep 17 00:00:00 2001 From: rtpsw Date: Tue, 9 Aug 2022 23:56:01 +0300 Subject: [PATCH 09/12] Update cpp/src/arrow/type_traits.h Co-authored-by: Antoine Pitrou --- cpp/src/arrow/type_traits.h | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index f7dbadd9ba730..bada2015e7468 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -995,6 +995,7 @@ static inline bool is_primitive(Type::type type_id) { /// \brief Check for a base-binary-like type /// +/// This predicate doesn't match fixed-size binary types. /// \param[in] type_id the type-id to check /// \return whether type-id is a base-binary-like type one static inline bool is_base_binary_like(Type::type type_id) { From 2d79b3689909adc8b0ab650e9f1e1341042efd2e Mon Sep 17 00:00:00 2001 From: rtpsw Date: Tue, 9 Aug 2022 23:56:23 +0300 Subject: [PATCH 10/12] Update cpp/src/arrow/type_traits.h Co-authored-by: Antoine Pitrou --- cpp/src/arrow/type_traits.h | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index bada2015e7468..cdc642016db25 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -1115,6 +1115,7 @@ static inline bool is_dictionary(Type::type type_id) { /// \brief Check for a fixed-size-binary type /// +/// This predicate also matches decimals. /// \param[in] type_id the type-id to check /// \return whether type-id is a fixed-size-binary type one static inline bool is_fixed_size_binary(Type::type type_id) { From 21fc1c47331ce260ead04039d4a633fe4e2dfe8d Mon Sep 17 00:00:00 2001 From: rtpsw Date: Tue, 9 Aug 2022 23:56:42 +0300 Subject: [PATCH 11/12] Update cpp/src/arrow/type_traits.h Co-authored-by: Antoine Pitrou --- cpp/src/arrow/type_traits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index cdc642016db25..20cd93a11920a 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -1261,7 +1261,7 @@ static inline int offset_bit_width(Type::type type_id) { /// \param[in] type the type to check /// \return whether type is an integer type /// -/// Convenience for checking using the types' id +/// Convenience for checking using the type's id static inline bool is_integer(const DataType& type) { return is_integer(type.id()); } /// \brief Check for a signed integer type From 3c173b5dbfeed667407239c23c8cfd315d3c560e Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 17 Aug 2022 15:51:28 +0200 Subject: [PATCH 12/12] Minor improvements to docstrings --- cpp/src/arrow/type_traits.h | 157 ++++++++++++++++++++---------------- 1 file changed, 86 insertions(+), 71 deletions(-) diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index 20cd93a11920a..66da3cadcb592 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -921,6 +921,7 @@ static inline bool is_floating(Type::type type_id) { /// \brief Check for a numeric type /// /// This predicate doesn't match decimals (see `is_decimal`). +/// /// \param[in] type_id the type-id to check /// \return whether type-id is a numeric type one static inline bool is_numeric(Type::type type_id) { @@ -961,6 +962,7 @@ static inline bool is_decimal(Type::type type_id) { /// \brief Check for a primitive type /// /// This predicate doesn't match null, decimals and binary-like types. +/// /// \param[in] type_id the type-id to check /// \return whether type-id is a primitive type one static inline bool is_primitive(Type::type type_id) { @@ -995,7 +997,9 @@ static inline bool is_primitive(Type::type type_id) { /// \brief Check for a base-binary-like type /// -/// This predicate doesn't match fixed-size binary types. +/// This predicate doesn't match fixed-size binary types and will otherwise +/// match all binary- and string-like types regardless of offset width. +/// /// \param[in] type_id the type-id to check /// \return whether type-id is a base-binary-like type one static inline bool is_base_binary_like(Type::type type_id) { @@ -1011,7 +1015,7 @@ static inline bool is_base_binary_like(Type::type type_id) { return false; } -/// \brief Check for a binary-like type +/// \brief Check for a binary-like type (i.e. with 32-bit offsets) /// /// \param[in] type_id the type-id to check /// \return whether type-id is a binary-like type one @@ -1026,7 +1030,7 @@ static inline bool is_binary_like(Type::type type_id) { return false; } -/// \brief Check for a large-binary-like type +/// \brief Check for a large-binary-like type (i.e. with 64-bit offsets) /// /// \param[in] type_id the type-id to check /// \return whether type-id is a large-binary-like type one @@ -1041,7 +1045,7 @@ static inline bool is_large_binary_like(Type::type type_id) { return false; } -/// \brief Check for a binary type +/// \brief Check for a binary (non-string) type /// /// \param[in] type_id the type-id to check /// \return whether type-id is a binary type one @@ -1138,54 +1142,6 @@ static inline bool is_fixed_width(Type::type type_id) { return is_primitive(type_id) || is_dictionary(type_id) || is_fixed_size_binary(type_id); } -static inline int bit_width(Type::type type_id) { - switch (type_id) { - case Type::BOOL: - return 1; - case Type::UINT8: - case Type::INT8: - return 8; - case Type::UINT16: - case Type::INT16: - return 16; - case Type::UINT32: - case Type::INT32: - case Type::DATE32: - case Type::TIME32: - return 32; - case Type::UINT64: - case Type::INT64: - case Type::DATE64: - case Type::TIME64: - case Type::TIMESTAMP: - case Type::DURATION: - return 64; - - case Type::HALF_FLOAT: - return 16; - case Type::FLOAT: - return 32; - case Type::DOUBLE: - return 64; - - case Type::INTERVAL_MONTHS: - return 32; - case Type::INTERVAL_DAY_TIME: - return 64; - case Type::INTERVAL_MONTH_DAY_NANO: - return 128; - - case Type::DECIMAL128: - return 128; - case Type::DECIMAL256: - return 256; - - default: - break; - } - return 0; -} - /// \brief Check for a list-like type /// /// \param[in] type_id the type-id to check @@ -1238,6 +1194,65 @@ static inline bool is_union(Type::type type_id) { return false; } +/// \brief Return the values bit width of a type +/// +/// \param[in] type_id the type-id to check +/// \return the values bit width, or 0 if the type does not have fixed-width values +/// +/// For Type::FIXED_SIZE_BINARY, you will instead need to inspect the concrete +/// DataType to get this information. +static inline int bit_width(Type::type type_id) { + switch (type_id) { + case Type::BOOL: + return 1; + case Type::UINT8: + case Type::INT8: + return 8; + case Type::UINT16: + case Type::INT16: + return 16; + case Type::UINT32: + case Type::INT32: + case Type::DATE32: + case Type::TIME32: + return 32; + case Type::UINT64: + case Type::INT64: + case Type::DATE64: + case Type::TIME64: + case Type::TIMESTAMP: + case Type::DURATION: + return 64; + + case Type::HALF_FLOAT: + return 16; + case Type::FLOAT: + return 32; + case Type::DOUBLE: + return 64; + + case Type::INTERVAL_MONTHS: + return 32; + case Type::INTERVAL_DAY_TIME: + return 64; + case Type::INTERVAL_MONTH_DAY_NANO: + return 128; + + case Type::DECIMAL128: + return 128; + case Type::DECIMAL256: + return 256; + + default: + break; + } + return 0; +} + +/// \brief Return the offsets bit width of a type +/// +/// \param[in] type_id the type-id to check +/// \return the offsets bit width, or 0 if the type does not have offsets static inline int offset_bit_width(Type::type type_id) { switch (type_id) { case Type::STRING: @@ -1269,7 +1284,7 @@ static inline bool is_integer(const DataType& type) { return is_integer(type.id( /// \param[in] type the type to check /// \return whether type is a signed integer type /// -/// Convenience for checking using the types' id +/// Convenience for checking using the type's id static inline bool is_signed_integer(const DataType& type) { return is_signed_integer(type.id()); } @@ -1279,7 +1294,7 @@ static inline bool is_signed_integer(const DataType& type) { /// \param[in] type the type to check /// \return whether type is an unsigned integer type /// -/// Convenience for checking using the types' id +/// Convenience for checking using the type's id static inline bool is_unsigned_integer(const DataType& type) { return is_unsigned_integer(type.id()); } @@ -1289,7 +1304,7 @@ static inline bool is_unsigned_integer(const DataType& type) { /// \param[in] type the type to check /// \return whether type is a floating point type /// -/// Convenience for checking using the types' id +/// Convenience for checking using the type's id static inline bool is_floating(const DataType& type) { return is_floating(type.id()); } /// \brief Check for a numeric type (number except boolean type) @@ -1297,7 +1312,7 @@ static inline bool is_floating(const DataType& type) { return is_floating(type.i /// \param[in] type the type to check /// \return whether type is a numeric type /// -/// Convenience for checking using the types' id +/// Convenience for checking using the type's id static inline bool is_numeric(const DataType& type) { return is_numeric(type.id()); } /// \brief Check for a decimal type @@ -1305,7 +1320,7 @@ static inline bool is_numeric(const DataType& type) { return is_numeric(type.id( /// \param[in] type the type to check /// \return whether type is a decimal type /// -/// Convenience for checking using the types' id +/// Convenience for checking using the type's id static inline bool is_decimal(const DataType& type) { return is_decimal(type.id()); } /// \brief Check for a primitive type @@ -1313,7 +1328,7 @@ static inline bool is_decimal(const DataType& type) { return is_decimal(type.id( /// \param[in] type the type to check /// \return whether type is a primitive type /// -/// Convenience for checking using the types' id +/// Convenience for checking using the type's id static inline bool is_primitive(const DataType& type) { return is_primitive(type.id()); } /// \brief Check for a binary or string-like type (except fixed-size binary) @@ -1321,7 +1336,7 @@ static inline bool is_primitive(const DataType& type) { return is_primitive(type /// \param[in] type the type to check /// \return whether type is a binary or string-like type /// -/// Convenience for checking using the types' id +/// Convenience for checking using the type's id static inline bool is_base_binary_like(const DataType& type) { return is_base_binary_like(type.id()); } @@ -1331,7 +1346,7 @@ static inline bool is_base_binary_like(const DataType& type) { /// \param[in] type the type to check /// \return whether type is a binary-like type /// -/// Convenience for checking using the types' id +/// Convenience for checking using the type's id static inline bool is_binary_like(const DataType& type) { return is_binary_like(type.id()); } @@ -1341,7 +1356,7 @@ static inline bool is_binary_like(const DataType& type) { /// \param[in] type the type to check /// \return whether type is a large-binary-like type /// -/// Convenience for checking using the types' id +/// Convenience for checking using the type's id static inline bool is_large_binary_like(const DataType& type) { return is_large_binary_like(type.id()); } @@ -1351,7 +1366,7 @@ static inline bool is_large_binary_like(const DataType& type) { /// \param[in] type the type to check /// \return whether type is a binary type /// -/// Convenience for checking using the types' id +/// Convenience for checking using the type's id static inline bool is_binary(const DataType& type) { return is_binary(type.id()); } /// \brief Check for a string type @@ -1359,7 +1374,7 @@ static inline bool is_binary(const DataType& type) { return is_binary(type.id()) /// \param[in] type the type to check /// \return whether type is a string type /// -/// Convenience for checking using the types' id +/// Convenience for checking using the type's id static inline bool is_string(const DataType& type) { return is_string(type.id()); } /// \brief Check for a temporal type, including time and timestamps for each unit @@ -1367,7 +1382,7 @@ static inline bool is_string(const DataType& type) { return is_string(type.id()) /// \param[in] type the type to check /// \return whether type is a temporal type /// -/// Convenience for checking using the types' id +/// Convenience for checking using the type's id static inline bool is_temporal(const DataType& type) { return is_temporal(type.id()); } /// \brief Check for an interval type @@ -1375,7 +1390,7 @@ static inline bool is_temporal(const DataType& type) { return is_temporal(type.i /// \param[in] type the type to check /// \return whether type is a interval type /// -/// Convenience for checking using the types' id +/// Convenience for checking using the type's id static inline bool is_interval(const DataType& type) { return is_interval(type.id()); } /// \brief Check for a dictionary type @@ -1383,7 +1398,7 @@ static inline bool is_interval(const DataType& type) { return is_interval(type.i /// \param[in] type the type to check /// \return whether type is a dictionary type /// -/// Convenience for checking using the types' id +/// Convenience for checking using the type's id static inline bool is_dictionary(const DataType& type) { return is_dictionary(type.id()); } @@ -1393,7 +1408,7 @@ static inline bool is_dictionary(const DataType& type) { /// \param[in] type the type to check /// \return whether type is a fixed-size-binary type /// -/// Convenience for checking using the types' id +/// Convenience for checking using the type's id static inline bool is_fixed_size_binary(const DataType& type) { return is_fixed_size_binary(type.id()); } @@ -1403,7 +1418,7 @@ static inline bool is_fixed_size_binary(const DataType& type) { /// \param[in] type the type to check /// \return whether type is a fixed-width type /// -/// Convenience for checking using the types' id +/// Convenience for checking using the type's id static inline bool is_fixed_width(const DataType& type) { return is_fixed_width(type.id()); } @@ -1413,7 +1428,7 @@ static inline bool is_fixed_width(const DataType& type) { /// \param[in] type the type to check /// \return whether type is a list-like type /// -/// Convenience for checking using the types' id +/// Convenience for checking using the type's id static inline bool is_list_like(const DataType& type) { return is_list_like(type.id()); } /// \brief Check for a nested type @@ -1421,7 +1436,7 @@ static inline bool is_list_like(const DataType& type) { return is_list_like(type /// \param[in] type the type to check /// \return whether type is a nested type /// -/// Convenience for checking using the types' id +/// Convenience for checking using the type's id static inline bool is_nested(const DataType& type) { return is_nested(type.id()); } /// \brief Check for a union type @@ -1429,7 +1444,7 @@ static inline bool is_nested(const DataType& type) { return is_nested(type.id()) /// \param[in] type the type to check /// \return whether type is a union type /// -/// Convenience for checking using the types' id +/// Convenience for checking using the type's id static inline bool is_union(const DataType& type) { return is_union(type.id()); } /// @}