Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Additional higher order functions to work with arrays #2942

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions ci/default-config
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ PROJECT_ROOT=$(cd $SCRIPTPATH/.. && pwd)

# get-sources
SOURCES_METHOD=local # clone, local, tarball
SOURCES_CLONE_URL="https://github.com/yandex/ClickHouse.git"
SOURCES_BRANCH="master"
SOURCES_CLONE_URL="https://github.com/astudnev/ClickHouse-1.git"
SOURCES_BRANCH="cum_sum_limit2"
SOURCES_COMMIT=HEAD # do checkout of this commit after clone

# prepare-toolchain
Expand Down
2 changes: 2 additions & 0 deletions dbms/src/Functions/FunctionsHigherOrder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ void registerFunctionsHigherOrder(FunctionFactory & factory)
factory.registerFunction<FunctionArraySort>();
factory.registerFunction<FunctionArrayReverseSort>();
factory.registerFunction<FunctionArrayCumSum>();
factory.registerFunction<FunctionArrayCumSumLimited>();
factory.registerFunction<FunctionArrayDifference>();
}

}
232 changes: 232 additions & 0 deletions dbms/src/Functions/FunctionsHigherOrder.h
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,234 @@ struct ArrayCumSumImpl

};

struct ArrayDifferenceImpl
{
static bool needBoolean() { return false; }
static bool needExpression() { return false; }
static bool needOneArray() { return false; }

static DataTypePtr getReturnType(const DataTypePtr & expression_return, const DataTypePtr & /*array_element*/)
{
if (checkDataType<DataTypeUInt8>(&*expression_return) ||
checkDataType<DataTypeUInt16>(&*expression_return) ||
checkDataType<DataTypeUInt32>(&*expression_return) ||
checkDataType<DataTypeUInt64>(&*expression_return))
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());

if (checkDataType<DataTypeInt8>(&*expression_return) ||
checkDataType<DataTypeInt16>(&*expression_return) ||
checkDataType<DataTypeInt32>(&*expression_return) ||
checkDataType<DataTypeInt64>(&*expression_return))
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt64>());

if (checkDataType<DataTypeFloat32>(&*expression_return) ||
checkDataType<DataTypeFloat64>(&*expression_return))
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat64>());

throw Exception("arrayDifference cannot process values of type " + expression_return->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}


template <typename Element, typename Result>
static bool executeType(const ColumnPtr & mapped, const ColumnArray & array, ColumnPtr & res_ptr)
{
const ColumnVector<Element> * column = checkAndGetColumn<ColumnVector<Element>>(&*mapped);

if (!column)
{
const ColumnConst * column_const = checkAndGetColumnConst<ColumnVector<Element>>(&*mapped);

if (!column_const)
return false;

const IColumn::Offsets & offsets = array.getOffsets();

auto res_nested = ColumnVector<Result>::create();
typename ColumnVector<Result>::Container & res_values = res_nested->getData();
res_values.resize(column_const->size());

size_t pos = 0;
for (size_t i = 0; i < offsets.size(); ++i)
{
// skip empty arrays
if (pos < offsets[i])
{
res_values[pos++] = 0;
for (; pos < offsets[i]; ++pos)
{
res_values[pos] = 0;
}
}
}

res_ptr = ColumnArray::create(std::move(res_nested), array.getOffsetsPtr());
return true;
}

const IColumn::Offsets & offsets = array.getOffsets();
const typename ColumnVector<Element>::Container & data = column->getData();

auto res_nested = ColumnVector<Result>::create();
typename ColumnVector<Result>::Container & res_values = res_nested->getData();
res_values.resize(data.size());

size_t pos = 0;
for (size_t i = 0; i < offsets.size(); ++i)
{
// skip empty arrays
if (pos < offsets[i])
{
res_values[pos] = 0;
for (++pos; pos < offsets[i]; ++pos)
{
res_values[pos] = data[pos] - data[pos - 1];
}
}
}
res_ptr = ColumnArray::create(std::move(res_nested), array.getOffsetsPtr());
return true;

}

static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped)
{
ColumnPtr res;

if (executeType< UInt8 , UInt64>(mapped, array, res) ||
executeType< UInt16, UInt64>(mapped, array, res) ||
executeType< UInt32, UInt64>(mapped, array, res) ||
executeType< UInt64, UInt64>(mapped, array, res) ||
executeType< Int8 , Int64>(mapped, array, res) ||
executeType< Int16, Int64>(mapped, array, res) ||
executeType< Int32, Int64>(mapped, array, res) ||
executeType< Int64, Int64>(mapped, array, res) ||
executeType<Float32,Float64>(mapped, array, res) ||
executeType<Float64,Float64>(mapped, array, res))
return res;
else
throw Exception("Unexpected column for arrayDifference: " + mapped->getName());
}

};


struct ArrayCumSumLimitedImpl
{
static bool needBoolean() { return false; }
static bool needExpression() { return false; }
static bool needOneArray() { return false; }

static DataTypePtr getReturnType(const DataTypePtr & expression_return, const DataTypePtr & /*array_element*/)
{
if (checkDataType<DataTypeUInt8>(&*expression_return) ||
checkDataType<DataTypeUInt16>(&*expression_return) ||
checkDataType<DataTypeUInt32>(&*expression_return) ||
checkDataType<DataTypeUInt64>(&*expression_return))
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());

if (checkDataType<DataTypeInt8>(&*expression_return) ||
checkDataType<DataTypeInt16>(&*expression_return) ||
checkDataType<DataTypeInt32>(&*expression_return) ||
checkDataType<DataTypeInt64>(&*expression_return))
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt64>());

if (checkDataType<DataTypeFloat32>(&*expression_return) ||
checkDataType<DataTypeFloat64>(&*expression_return))
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat64>());

throw Exception("arrayCumSumLimited cannot add values of type " + expression_return->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}


template <typename Element, typename Result>
static bool executeType(const ColumnPtr & mapped, const ColumnArray & array, ColumnPtr & res_ptr)
{
const ColumnVector<Element> * column = checkAndGetColumn<ColumnVector<Element>>(&*mapped);

if (!column)
{
const ColumnConst * column_const = checkAndGetColumnConst<ColumnVector<Element>>(&*mapped);

if (!column_const)
return false;

const Element x = column_const->template getValue<Element>();
const IColumn::Offsets & offsets = array.getOffsets();

auto res_nested = ColumnVector<Result>::create();
typename ColumnVector<Result>::Container & res_values = res_nested->getData();
res_values.resize(column_const->size());

size_t pos = 0;
for (size_t i = 0; i < offsets.size(); ++i)
{
// skip empty arrays
if (pos < offsets[i])
{
res_values[pos++] = x;
for (; pos < offsets[i]; ++pos)
{
res_values[pos] = res_values[pos - 1] + x;
if(res_values[pos] < 0){
res_values[pos] = 0;
}
}
}
}

res_ptr = ColumnArray::create(std::move(res_nested), array.getOffsetsPtr());
return true;
}

const IColumn::Offsets & offsets = array.getOffsets();
const typename ColumnVector<Element>::Container & data = column->getData();

auto res_nested = ColumnVector<Result>::create();
typename ColumnVector<Result>::Container & res_values = res_nested->getData();
res_values.resize(data.size());

size_t pos = 0;
for (size_t i = 0; i < offsets.size(); ++i)
{
// skip empty arrays
if (pos < offsets[i])
{
res_values[pos] = data[pos];
for (++pos; pos < offsets[i]; ++pos)
{
res_values[pos] = res_values[pos - 1] + data[pos];
if(res_values[pos] < 0){
res_values[pos] = 0;
}
}
}
}
res_ptr = ColumnArray::create(std::move(res_nested), array.getOffsetsPtr());
return true;

}

static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped)
{
ColumnPtr res;

if (executeType< UInt8 , UInt64>(mapped, array, res) ||
executeType< UInt16, UInt64>(mapped, array, res) ||
executeType< UInt32, UInt64>(mapped, array, res) ||
executeType< UInt64, UInt64>(mapped, array, res) ||
executeType< Int8 , Int64>(mapped, array, res) ||
executeType< Int16, Int64>(mapped, array, res) ||
executeType< Int32, Int64>(mapped, array, res) ||
executeType< Int64, Int64>(mapped, array, res) ||
executeType<Float32,Float64>(mapped, array, res) ||
executeType<Float64,Float64>(mapped, array, res))
return res;
else
throw Exception("Unexpected column for arrayCumSumLimited: " + mapped->getName());
}

};


template <typename Impl, typename Name>
class FunctionArrayMapped : public IFunction
Expand Down Expand Up @@ -958,6 +1186,8 @@ struct NameArrayFirstIndex { static constexpr auto name = "arrayFirstIndex"; };
struct NameArraySort { static constexpr auto name = "arraySort"; };
struct NameArrayReverseSort { static constexpr auto name = "arrayReverseSort"; };
struct NameArrayCumSum { static constexpr auto name = "arrayCumSum"; };
struct NameArrayCumSumLimited { static constexpr auto name = "arrayCumSumLimited"; };
struct NameArrayDifference { static constexpr auto name = "arrayDifference"; };

using FunctionArrayMap = FunctionArrayMapped<ArrayMapImpl, NameArrayMap>;
using FunctionArrayFilter = FunctionArrayMapped<ArrayFilterImpl, NameArrayFilter>;
Expand All @@ -970,5 +1200,7 @@ using FunctionArrayFirstIndex = FunctionArrayMapped<ArrayFirstIndexImpl, NameArr
using FunctionArraySort = FunctionArrayMapped<ArraySortImpl<true>, NameArraySort>;
using FunctionArrayReverseSort = FunctionArrayMapped<ArraySortImpl<false>, NameArrayReverseSort>;
using FunctionArrayCumSum = FunctionArrayMapped<ArrayCumSumImpl, NameArrayCumSum>;
using FunctionArrayCumSumLimited = FunctionArrayMapped<ArrayCumSumLimitedImpl, NameArrayCumSumLimited>;
using FunctionArrayDifference = FunctionArrayMapped<ArrayDifferenceImpl, NameArrayDifference>;

}