Skip to content

Commit

Permalink
Implementing review feedback. apache#4
Browse files Browse the repository at this point in the history
  • Loading branch information
rok committed Jun 8, 2021
1 parent 1a16757 commit 8dfe6c2
Show file tree
Hide file tree
Showing 4 changed files with 174 additions and 130 deletions.
51 changes: 27 additions & 24 deletions cpp/src/arrow/compute/api_scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,7 @@ Result<Datum> IfElse(const Datum& cond, const Datum& left, const Datum& right,
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 4.0.0
/// \since 5.0.0
/// \note API not yet finalized
ARROW_EXPORT
Result<Datum> Year(const Datum& values, ExecContext* ctx = NULLPTR);
Expand All @@ -539,7 +539,7 @@ Result<Datum> Year(const Datum& values, ExecContext* ctx = NULLPTR);
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 4.0.0
/// \since 5.0.0
/// \note API not yet finalized
ARROW_EXPORT
Result<Datum> Month(const Datum& values, ExecContext* ctx = NULLPTR);
Expand All @@ -550,7 +550,7 @@ Result<Datum> Month(const Datum& values, ExecContext* ctx = NULLPTR);
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 4.0.0
/// \since 5.0.0
/// \note API not yet finalized
ARROW_EXPORT
Result<Datum> Day(const Datum& values, ExecContext* ctx = NULLPTR);
Expand All @@ -562,7 +562,7 @@ Result<Datum> Day(const Datum& values, ExecContext* ctx = NULLPTR);
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 4.0.0
/// \since 5.0.0
/// \note API not yet finalized
ARROW_EXPORT Result<Datum> DayOfWeek(const Datum& values, ExecContext* ctx = NULLPTR);

Expand All @@ -573,7 +573,7 @@ ARROW_EXPORT Result<Datum> DayOfWeek(const Datum& values, ExecContext* ctx = NUL
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 4.0.0
/// \since 5.0.0
/// \note API not yet finalized
ARROW_EXPORT Result<Datum> DayOfYear(const Datum& values, ExecContext* ctx = NULLPTR);

Expand All @@ -584,31 +584,31 @@ ARROW_EXPORT Result<Datum> DayOfYear(const Datum& values, ExecContext* ctx = NUL
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 4.0.0
/// \since 5.0.0
/// \note API not yet finalized
ARROW_EXPORT
Result<Datum> ISOYear(const Datum& values, ExecContext* ctx = NULLPTR);

/// \brief ISOWeek returns ISO week of year number for each element of `values`.
/// First ISO week has the majority (4 or more) of it's days in January.
/// First ISO week has the majority (4 or more) of its days in January.
/// Week of the year starts with 1 and can run up to 53.
///
/// \param[in] values input to extract ISO week of year from
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 4.0.0
/// \since 5.0.0
/// \note API not yet finalized
ARROW_EXPORT Result<Datum> ISOWeek(const Datum& values, ExecContext* ctx = NULLPTR);

/// \brief ISOCalendar returns a (ISO year, ISO week, weekday) struct for each element of
/// `values`
/// \brief ISOCalendar returns a (ISO year, ISO week, Day of week) struct for each element
/// of `values`
///
/// \param[in] values input to ISO calendar struct from
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 4.0.0
/// \since 5.0.0
/// \note API not yet finalized
ARROW_EXPORT Result<Datum> ISOCalendar(const Datum& values, ExecContext* ctx = NULLPTR);

Expand All @@ -619,7 +619,7 @@ ARROW_EXPORT Result<Datum> ISOCalendar(const Datum& values, ExecContext* ctx = N
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 4.0.0
/// \since 5.0.0
/// \note API not yet finalized
ARROW_EXPORT Result<Datum> Quarter(const Datum& values, ExecContext* ctx = NULLPTR);

Expand All @@ -629,7 +629,7 @@ ARROW_EXPORT Result<Datum> Quarter(const Datum& values, ExecContext* ctx = NULLP
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 4.0.0
/// \since 5.0.0
/// \note API not yet finalized
ARROW_EXPORT
Result<Datum> Hour(const Datum& values, ExecContext* ctx = NULLPTR);
Expand All @@ -640,7 +640,7 @@ Result<Datum> Hour(const Datum& values, ExecContext* ctx = NULLPTR);
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 4.0.0
/// \since 5.0.0
/// \note API not yet finalized
ARROW_EXPORT
Result<Datum> Minute(const Datum& values, ExecContext* ctx = NULLPTR);
Expand All @@ -651,52 +651,55 @@ Result<Datum> Minute(const Datum& values, ExecContext* ctx = NULLPTR);
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 4.0.0
/// \since 5.0.0
/// \note API not yet finalized
ARROW_EXPORT
Result<Datum> Second(const Datum& values, ExecContext* ctx = NULLPTR);

/// \brief Millisecond returns milliseconds value for each element of `values`
/// \brief Millisecond returns number of milliseconds since the last full second
/// for each element of `values`
///
/// \param[in] values input to extract milliseconds from
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 4.0.0
/// \since 5.0.0
/// \note API not yet finalized
ARROW_EXPORT
Result<Datum> Millisecond(const Datum& values, ExecContext* ctx = NULLPTR);

/// \brief Microsecond returns microseconds value for each element of `values`
/// \brief Microsecond returns number of microseconds since the last full millisecond
/// for each element of `values`
///
/// \param[in] values input to extract microseconds from
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 4.0.0
/// \since 5.0.0
/// \note API not yet finalized
ARROW_EXPORT
Result<Datum> Microsecond(const Datum& values, ExecContext* ctx = NULLPTR);

/// \brief Nanosecond returns nanoseconds value for each element of `values`
/// \brief Nanosecond returns number of nanoseconds since the last full millisecond
/// for each element of `values`
///
/// \param[in] values input to extract nanoseconds from
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 4.0.0
/// \since 5.0.0
/// \note API not yet finalized
ARROW_EXPORT
Result<Datum> Nanosecond(const Datum& values, ExecContext* ctx = NULLPTR);

/// \brief Subsecond returns subsecond time fraction since last second as a float for each element of
/// `values`
/// \brief Subsecond returns the fraction of second elapsed since last full second
/// as a float for each element of `values`
///
/// \param[in] values input to extract subsecond from
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 4.0.0
/// \since 5.0.0
/// \note API not yet finalized
ARROW_EXPORT Result<Datum> Subsecond(const Datum& values, ExecContext* ctx = NULLPTR);

Expand Down
56 changes: 33 additions & 23 deletions cpp/src/arrow/compute/kernels/scalar_temporal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,8 @@ struct DayOfWeek {
static T Call(KernelContext*, int64_t arg, Status*) {
return static_cast<T>(
weekday(year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))))
.iso_encoding());
.iso_encoding() -
1);
}
};

Expand All @@ -185,22 +186,27 @@ struct DayOfYear {
// Extract ISO Year values from timestamp
//
// First week of an ISO year has the majority (4 or more) of it's days in January.
// To convert gregorian year to ISO year we add three days to gregorian date and
// take the gregorian year of resulting date as ISO year.
// Last week of an ISO year has the year's last Thursday in it.

template <typename Duration>
struct ISOYear {
template <typename T>
static T Call(KernelContext*, int64_t arg, Status*) {
return static_cast<T>(static_cast<const int32_t>(
year_month_day{floor<days>(sys_time<Duration>(Duration{arg})) + days{3}}.year()));
const auto t = floor<days>(sys_time<Duration>(Duration{arg}));
auto y = year_month_day{t + days{3}}.year();
auto start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu);
if (t < start) {
--y;
}
return static_cast<T>(static_cast<int32_t>(y));
}
};

// ----------------------------------------------------------------------
// Extract ISO week from timestamp

//
// First week of an ISO year has the majority (4 or more) of it's days in January.
// Last week of an ISO year has the year's last Thursday in it.
// Based on
// https://github.com/HowardHinnant/date/blob/6e921e1b1d21e84a5c82416ba7ecd98e33a436d0/include/date/iso_week.h#L1503
template <typename Duration>
Expand Down Expand Up @@ -262,8 +268,7 @@ struct Second {
template <typename T>
static T Call(KernelContext*, int64_t arg, Status*) {
Duration t = Duration{arg};
return static_cast<T>(
std::chrono::duration<double>(t - floor<std::chrono::minutes>(t)).count());
return static_cast<T>((t - floor<std::chrono::minutes>(t)) / std::chrono::seconds(1));
}
};

Expand All @@ -275,8 +280,8 @@ struct Subsecond {
template <typename T>
static T Call(KernelContext*, int64_t arg, Status*) {
Duration t = Duration{arg};
return static_cast<T>((t - floor<std::chrono::seconds>(t)) /
std::chrono::nanoseconds(1));
return static_cast<T>(
(std::chrono::duration<double>(t - floor<std::chrono::seconds>(t)).count()));
}
};

Expand Down Expand Up @@ -329,7 +334,7 @@ inline std::vector<int64_t> get_iso_calendar(int64_t arg) {
--y;
start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu);
}
return {static_cast<int64_t>(static_cast<int32_t>(ymd.year())),
return {static_cast<int64_t>(static_cast<int32_t>(y)),
static_cast<int64_t>(trunc<weeks>(t - start).count() + 1),
static_cast<int64_t>(weekday(ymd).iso_encoding())};
}
Expand Down Expand Up @@ -413,23 +418,27 @@ std::shared_ptr<ScalarFunction> MakeTemporal(std::string name, const FunctionDoc
auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);

for (auto unit : internal::AllTimeUnits()) {
InputType in_type(match::TimestampTypeUnit(unit));
InputType in_type{match::TimestampTypeUnit(unit)};
switch (unit) {
case TimeUnit::SECOND: {
auto exec = ScalarUnaryTemporal<Op<std::chrono::seconds>, OutType>::Exec;
DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
break;
}
case TimeUnit::MILLI: {
auto exec = ScalarUnaryTemporal<Op<std::chrono::milliseconds>, OutType>::Exec;
DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
break;
}
case TimeUnit::MICRO: {
auto exec = ScalarUnaryTemporal<Op<std::chrono::microseconds>, OutType>::Exec;
DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
break;
}
case TimeUnit::NANO: {
auto exec = ScalarUnaryTemporal<Op<std::chrono::nanoseconds>, OutType>::Exec;
DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
break;
}
}
}
Expand All @@ -439,31 +448,32 @@ std::shared_ptr<ScalarFunction> MakeTemporal(std::string name, const FunctionDoc
template <template <typename...> class Op>
std::shared_ptr<ScalarFunction> MakeSimpleTemporal(std::string name,
const FunctionDoc* doc) {
const auto& out_type = struct_({field("iso_year", int64()), field("iso_week", int64()),
field("day_of_week", int64())});
auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
auto output_type = struct_({field("iso_year", int64()), field("iso_week", int64()),
field("day_of_week", int64())});

for (auto unit : internal::AllTimeUnits()) {
InputType in_type{match::TimestampTypeUnit(unit)};
switch (unit) {
case TimeUnit::SECOND: {
auto exec = SimpleUnary<Op<std::chrono::seconds>>;
DCHECK_OK(func->AddKernel({match::TimestampTypeUnit(unit)},
OutputType(output_type), std::move(exec)));
DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
break;
}
case TimeUnit::MILLI: {
auto exec = SimpleUnary<Op<std::chrono::milliseconds>>;
DCHECK_OK(func->AddKernel({match::TimestampTypeUnit(unit)},
OutputType(output_type), std::move(exec)));
DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
break;
}
case TimeUnit::MICRO: {
auto exec = SimpleUnary<Op<std::chrono::microseconds>>;
DCHECK_OK(func->AddKernel({match::TimestampTypeUnit(unit)},
OutputType(output_type), std::move(exec)));
DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
break;
}
case TimeUnit::NANO: {
auto exec = SimpleUnary<Op<std::chrono::nanoseconds>>;
DCHECK_OK(func->AddKernel({match::TimestampTypeUnit(unit)},
OutputType(output_type), std::move(exec)));
DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
break;
}
}
}
Expand Down Expand Up @@ -607,7 +617,7 @@ void RegisterScalarTemporal(FunctionRegistry* registry) {
auto nanosecond = MakeTemporal<Nanosecond, Int64Type>("nanosecond", &nanosecond_doc);
DCHECK_OK(registry->AddFunction(std::move(nanosecond)));

auto subsecond = MakeTemporal<Subsecond, Int64Type>("subsecond", &subsecond_doc);
auto subsecond = MakeTemporal<Subsecond, DoubleType>("subsecond", &subsecond_doc);
DCHECK_OK(registry->AddFunction(std::move(subsecond)));
}

Expand Down
Loading

0 comments on commit 8dfe6c2

Please sign in to comment.