Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Decimal improvements #3047

Merged
merged 8 commits into from
Sep 6, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions dbms/src/Core/DecimalComparison.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ namespace ErrorCodes
}

///
inline bool allowDecimalComparison(const IDataType & left_type, const IDataType & right_type)
inline bool allowDecimalComparison(const IDataType * left_type, const IDataType * right_type)
{
if (isDecimal(left_type))
{
if (isDecimal(right_type) || notDecimalButComparableToDecimal(right_type))
if (isDecimal(right_type) || isNotDecimalButComparableToDecimal(right_type))
return true;
}
else if (notDecimalButComparableToDecimal(left_type) && isDecimal(right_type))
else if (isNotDecimalButComparableToDecimal(left_type) && isDecimal(right_type))
return true;
return false;
}
Expand Down
33 changes: 0 additions & 33 deletions dbms/src/DataTypes/DataTypesDecimal.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,20 +134,12 @@ class DataTypeDecimal final : public DataTypeSimpleSerialization
bool textCanContainOnlyValidUTF8() const override { return true; }
bool isComparable() const override { return true; }
bool isValueRepresentedByNumber() const override { return true; }
bool isValueRepresentedByInteger() const override { return true; }
bool isValueRepresentedByUnsignedInteger() const override { return false; }
bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { return true; }
bool haveMaximumSizeOfValue() const override { return true; }
size_t getSizeOfValueInMemory() const override { return sizeof(T); }
bool isCategorial() const override { return isValueRepresentedByInteger(); }

bool canBeUsedAsVersion() const override { return false; }
bool isSummable() const override { return true; }
bool canBeUsedInBitOperations() const override { return false; }
bool isUnsignedInteger() const override { return false; }
bool canBeUsedInBooleanContext() const override { return true; }
bool isNumber() const override { return true; }
bool isInteger() const override { return false; }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Too much cases where isNumber() means something more then expected.
In example, when isNumber enabled it leads to compareAt() for Decimals with different scales in some cases.

bool canBeInsideNullable() const override { return true; }

/// Decimal specific
Expand Down Expand Up @@ -255,17 +247,6 @@ inline const DataTypeDecimal<T> * checkDecimal(const IDataType & data_type)
return typeid_cast<const DataTypeDecimal<T> *>(&data_type);
}

inline bool isDecimal(const IDataType & data_type)
{
if (typeid_cast<const DataTypeDecimal<Decimal32> *>(&data_type))
return true;
if (typeid_cast<const DataTypeDecimal<Decimal64> *>(&data_type))
return true;
if (typeid_cast<const DataTypeDecimal<Decimal128> *>(&data_type))
return true;
return false;
}

inline UInt32 getDecimalScale(const IDataType & data_type)
{
if (auto * decimal_type = checkDecimal<Decimal32>(data_type))
Expand All @@ -278,20 +259,6 @@ inline UInt32 getDecimalScale(const IDataType & data_type)
}

///
inline bool notDecimalButComparableToDecimal(const IDataType & data_type)
{
if (data_type.isInteger())
return true;
return false;
}

///
inline bool comparableToDecimal(const IDataType & data_type)
{
if (data_type.isInteger())
return true;
return isDecimal(data_type);
}

template <typename DataType> constexpr bool IsDecimal = false;
template <> constexpr bool IsDecimal<DataTypeDecimal<Decimal32>> = true;
Expand Down
72 changes: 72 additions & 0 deletions dbms/src/DataTypes/IDataType.h
Original file line number Diff line number Diff line change
Expand Up @@ -423,5 +423,77 @@ class IDataType : private boost::noncopyable
};


struct DataTypeExtractor
{
TypeIndex idx;

DataTypeExtractor(const IDataType * data_type)
: idx(data_type->getTypeId())
{}

bool isUInt8() const { return idx == TypeIndex::UInt8; }
bool isUInt16() const { return idx == TypeIndex::UInt16; }
bool isUInt32() const { return idx == TypeIndex::UInt32; }
bool isUInt64() const { return idx == TypeIndex::UInt64; }
bool isUInt128() const { return idx == TypeIndex::UInt128; }
bool isUInt() const { return isUInt8() || isUInt16() || isUInt32() || isUInt64() || isUInt128(); }

bool isInt8() const { return idx == TypeIndex::Int8; }
bool isInt16() const { return idx == TypeIndex::Int16; }
bool isInt32() const { return idx == TypeIndex::Int32; }
bool isInt64() const { return idx == TypeIndex::Int64; }
bool isInt128() const { return idx == TypeIndex::Int128; }
bool isInt() const { return isInt8() || isInt16() || isInt32() || isInt64() || isInt128(); }

bool isDecimal32() const { return idx == TypeIndex::Decimal32; }
bool isDecimal64() const { return idx == TypeIndex::Decimal64; }
bool isDecimal128() const { return idx == TypeIndex::Decimal128; }
bool isDecimal() const { return isDecimal32() || isDecimal64() || isDecimal128(); }

bool isFloat32() const { return idx == TypeIndex::Float32; }
bool isFloat64() const { return idx == TypeIndex::Float64; }
bool isFloat() const { return isFloat32() || isFloat64(); }

bool isEnum8() const { return idx == TypeIndex::Enum8; }
bool isEnum16() const { return idx == TypeIndex::Enum16; }
bool isEnum() const { return isEnum8() || isEnum16(); }

bool isDate() const { return idx == TypeIndex::Date; }
bool isDateTime() const { return idx == TypeIndex::DateTime; }
bool isDateOrDateTime() const { return isDate() || isDateTime(); }

bool isString() const { return idx == TypeIndex::String; }
bool isFixedString() const { return idx == TypeIndex::FixedString; }
bool isStringOrFixedString() const { return isString() || isFixedString(); }

bool isUUID() const { return idx == TypeIndex::UUID; }
bool isArray() const { return idx == TypeIndex::Array; }
bool isTuple() const { return idx == TypeIndex::Tuple; }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now you have to replace typeid_casts, checkDataType across all the codebase.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK

};

/// IDataType helpers (alternative for IDataType virtual methods)

inline bool isEnum(const IDataType * data_type)
{
return DataTypeExtractor(data_type).isEnum();
}

inline bool isDecimal(const IDataType * data_type)
{
return DataTypeExtractor(data_type).isDecimal();
}

inline bool isNotDecimalButComparableToDecimal(const IDataType * data_type)
{
DataTypeExtractor which(data_type);
return which.isInt() || which.isUInt();
}

inline bool isCompilableType(const IDataType * data_type)
{
return data_type->isValueRepresentedByNumber() && !isDecimal(data_type);
}


}

66 changes: 44 additions & 22 deletions dbms/src/DataTypes/getLeastSupertype.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#include <unordered_set>

#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <Common/typeid_cast.h>
Expand All @@ -11,6 +13,7 @@
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>


namespace DB
Expand Down Expand Up @@ -185,22 +188,19 @@ DataTypePtr getLeastSupertype(const DataTypes & types)

/// Non-recursive rules

std::unordered_set<TypeIndex> type_ids;
for (const auto & type : types)
type_ids.insert(type->getTypeId());

/// For String and FixedString, or for different FixedStrings, the common type is String.
/// No other types are compatible with Strings. TODO Enums?
{
bool have_string = false;
bool all_strings = true;
UInt32 have_string = type_ids.count(TypeIndex::String);
UInt32 have_fixed_string = type_ids.count(TypeIndex::FixedString);

for (const auto & type : types)
{
if (type->isStringOrFixedString())
have_string = true;
else
all_strings = false;
}

if (have_string)
if (have_string || have_fixed_string)
{
bool all_strings = type_ids.size() == (have_string + have_fixed_string);
if (!all_strings)
throw Exception(getExceptionMessagePrefix(types) + " because some of them are String/FixedString and some of them are not", ErrorCodes::NO_COMMON_TYPE);

Expand All @@ -210,26 +210,48 @@ DataTypePtr getLeastSupertype(const DataTypes & types)

/// For Date and DateTime, the common type is DateTime. No other types are compatible.
{
bool have_date_or_datetime = false;
bool all_date_or_datetime = true;

for (const auto & type : types)
{
if (type->isDateOrDateTime())
have_date_or_datetime = true;
else
all_date_or_datetime = false;
}
UInt32 have_date = type_ids.count(TypeIndex::Date);
UInt32 have_datetime = type_ids.count(TypeIndex::DateTime);

if (have_date_or_datetime)
if (have_date || have_datetime)
{
bool all_date_or_datetime = type_ids.size() == (have_date + have_datetime);
if (!all_date_or_datetime)
throw Exception(getExceptionMessagePrefix(types) + " because some of them are Date/DateTime and some of them are not", ErrorCodes::NO_COMMON_TYPE);

return std::make_shared<DataTypeDateTime>();
}
}

/// Decimals
{
UInt32 have_decimal32 = type_ids.count(TypeIndex::Decimal32);
UInt32 have_decimal64 = type_ids.count(TypeIndex::Decimal64);
UInt32 have_decimal128 = type_ids.count(TypeIndex::Decimal128);

if (have_decimal32 || have_decimal64 || have_decimal128)
{
bool all_are_decimals = type_ids.size() == (have_decimal32 + have_decimal64 + have_decimal128);
if (!all_are_decimals)
throw Exception(getExceptionMessagePrefix(types) + " because some of them are Decimals and some are not",
ErrorCodes::NO_COMMON_TYPE);

UInt32 max_scale = 0;
for (const auto & type : types)
{
UInt32 scale = getDecimalScale(*type);
if (scale > max_scale)
max_scale = scale;
}

if (have_decimal128)
return std::make_shared<DataTypeDecimal<Decimal128>>(DataTypeDecimal<Decimal128>::maxPrecision(), max_scale);
if (have_decimal64)
return std::make_shared<DataTypeDecimal<Decimal64>>(DataTypeDecimal<Decimal64>::maxPrecision(), max_scale);
return std::make_shared<DataTypeDecimal<Decimal32>>(DataTypeDecimal<Decimal32>::maxPrecision(), max_scale);
}
}

/// For numeric types, the most complicated part.
{
bool all_numbers = true;
Expand Down
12 changes: 9 additions & 3 deletions dbms/src/Functions/FunctionsArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -733,6 +733,14 @@ struct ArrayIndexGenericNullImpl
}
};


inline bool allowArrayIndex(const IDataType * data_type0, const IDataType * data_type1)
{
return ((data_type0->isNumber() || isEnum(data_type0)) && data_type1->isNumber())
|| data_type0->equals(*data_type1);
}


template <typename IndexConv, typename Name>
class FunctionArrayIndex : public IFunction
{
Expand Down Expand Up @@ -1010,9 +1018,7 @@ class FunctionArrayIndex : public IFunction
DataTypePtr observed_type0 = removeNullable(array_type->getNestedType());
DataTypePtr observed_type1 = removeNullable(arguments[1]);

/// We also support arrays of Enum type (that are represented by number) to search numeric values.
if (!(observed_type0->isValueRepresentedByNumber() && observed_type1->isNumber())
&& !observed_type0->equals(*observed_type1))
if (!allowArrayIndex(observed_type0.get(), observed_type1.get()))
throw Exception("Types of array and 2nd argument of function "
+ getName() + " must be identical up to nullability or numeric types or Enum and numeric type. Passed: "
+ arguments[0]->getName() + " and " + arguments[1]->getName() + ".",
Expand Down
Loading