Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-13096: [C++] Implement logarithm compute functions #10567

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cpp/src/arrow/compute/api_scalar.cc
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,10 @@ SCALAR_ARITHMETIC_UNARY(Asin, "asin", "asin_checked")
SCALAR_ARITHMETIC_UNARY(Acos, "acos", "acos_checked")
SCALAR_ARITHMETIC_UNARY(Tan, "tan", "tan_checked")
SCALAR_EAGER_UNARY(Atan, "atan")
SCALAR_ARITHMETIC_UNARY(Ln, "ln", "ln_checked")
SCALAR_ARITHMETIC_UNARY(Log10, "log10", "log10_checked")
SCALAR_ARITHMETIC_UNARY(Log2, "log2", "log2_checked")
SCALAR_ARITHMETIC_UNARY(Log1p, "log1p", "log1p_checked")

#define SCALAR_ARITHMETIC_BINARY(NAME, REGISTRY_NAME, REGISTRY_CHECKED_NAME) \
Result<Datum> NAME(const Datum& left, const Datum& right, ArithmeticOptions options, \
Expand Down
59 changes: 55 additions & 4 deletions cpp/src/arrow/compute/api_scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,8 +246,9 @@ class ARROW_EXPORT ProjectOptions : public FunctionOptions {

/// @}

/// \brief Get the absolute value of a value. Array values can be of arbitrary
/// length. If argument is null the result will be null.
/// \brief Get the absolute value of a value.
///
/// If argument is null the result will be null.
///
/// \param[in] arg the value transformed
/// \param[in] options arithmetic options (overflow handling), optional
Expand Down Expand Up @@ -311,8 +312,9 @@ Result<Datum> Divide(const Datum& left, const Datum& right,
ArithmeticOptions options = ArithmeticOptions(),
ExecContext* ctx = NULLPTR);

/// \brief Negate a value. Array values can be of arbitrary length. If argument
/// is null the result will be null.
/// \brief Negate values.
///
/// If argument is null the result will be null.
///
/// \param[in] arg the value negated
/// \param[in] options arithmetic options (overflow handling), optional
Expand Down Expand Up @@ -424,6 +426,55 @@ Result<Datum> Atan(const Datum& arg, ExecContext* ctx = NULLPTR);
ARROW_EXPORT
Result<Datum> Atan2(const Datum& y, const Datum& x, ExecContext* ctx = NULLPTR);

/// \brief Get the natural log of a value.
///
/// If argument is null the result will be null.
///
/// \param[in] arg The values to compute the logarithm for.
/// \param[in] options arithmetic options (overflow handling), optional
/// \param[in] ctx the function execution context, optional
/// \return the elementwise natural log
ARROW_EXPORT
Result<Datum> Ln(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
ExecContext* ctx = NULLPTR);

/// \brief Get the log base 10 of a value.
///
/// If argument is null the result will be null.
///
/// \param[in] arg The values to compute the logarithm for.
/// \param[in] options arithmetic options (overflow handling), optional
/// \param[in] ctx the function execution context, optional
/// \return the elementwise log base 10
ARROW_EXPORT
Result<Datum> Log10(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
ExecContext* ctx = NULLPTR);

/// \brief Get the log base 2 of a value.
///
/// If argument is null the result will be null.
///
/// \param[in] arg The values to compute the logarithm for.
/// \param[in] options arithmetic options (overflow handling), optional
/// \param[in] ctx the function execution context, optional
/// \return the elementwise log base 2
ARROW_EXPORT
Result<Datum> Log2(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
ExecContext* ctx = NULLPTR);

/// \brief Get the natural log of (1 + value).
///
/// If argument is null the result will be null.
/// This function may be more accurate than Log(1 + value) for values close to zero.
///
/// \param[in] arg The values to compute the logarithm for.
/// \param[in] options arithmetic options (overflow handling), optional
/// \param[in] ctx the function execution context, optional
/// \return the elementwise natural log
ARROW_EXPORT
Result<Datum> Log1p(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
ExecContext* ctx = NULLPTR);

/// \brief Find the element-wise maximum of any number of arrays or scalars.
/// Array values must be the same length.
///
Expand Down
204 changes: 200 additions & 4 deletions cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,11 @@ template <typename T>
using is_signed_integer =
std::integral_constant<bool, std::is_integral<T>::value && std::is_signed<T>::value>;

template <typename T>
using enable_if_signed_integer = enable_if_t<is_signed_integer<T>::value, T>;
template <typename T, typename R = T>
using enable_if_signed_integer = enable_if_t<is_signed_integer<T>::value, R>;

template <typename T>
using enable_if_unsigned_integer = enable_if_t<is_unsigned_integer<T>::value, T>;
template <typename T, typename R = T>
using enable_if_unsigned_integer = enable_if_t<is_unsigned_integer<T>::value, R>;

template <typename T, typename R = T>
using enable_if_integer =
Expand Down Expand Up @@ -686,6 +686,118 @@ struct Atan2 {
}
};

struct LogNatural {
template <typename T, typename Arg>
static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status*) {
static_assert(std::is_same<T, Arg>::value, "");
if (arg == 0.0) {
return -std::numeric_limits<T>::infinity();
} else if (arg < 0.0) {
return std::numeric_limits<T>::quiet_NaN();
}
return std::log(arg);
}
};

struct LogNaturalChecked {
template <typename T, typename Arg>
static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status* st) {
static_assert(std::is_same<T, Arg>::value, "");
if (arg == 0.0) {
*st = Status::Invalid("logarithm of zero");
return arg;
} else if (arg < 0.0) {
*st = Status::Invalid("logarithm of negative number");
return arg;
}
return std::log(arg);
}
};

struct Log10 {
template <typename T, typename Arg>
static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status*) {
static_assert(std::is_same<T, Arg>::value, "");
if (arg == 0.0) {
return -std::numeric_limits<T>::infinity();
} else if (arg < 0.0) {
return std::numeric_limits<T>::quiet_NaN();
}
return std::log10(arg);
}
};

struct Log10Checked {
template <typename T, typename Arg>
static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status* st) {
static_assert(std::is_same<T, Arg>::value, "");
if (arg == 0) {
*st = Status::Invalid("logarithm of zero");
return arg;
} else if (arg < 0) {
*st = Status::Invalid("logarithm of negative number");
return arg;
}
return std::log10(arg);
}
};

struct Log2 {
template <typename T, typename Arg>
static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status*) {
static_assert(std::is_same<T, Arg>::value, "");
if (arg == 0.0) {
return -std::numeric_limits<T>::infinity();
} else if (arg < 0.0) {
return std::numeric_limits<T>::quiet_NaN();
}
return std::log2(arg);
}
};

struct Log2Checked {
template <typename T, typename Arg>
static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status* st) {
static_assert(std::is_same<T, Arg>::value, "");
if (arg == 0.0) {
*st = Status::Invalid("logarithm of zero");
return arg;
} else if (arg < 0.0) {
*st = Status::Invalid("logarithm of negative number");
return arg;
}
return std::log2(arg);
}
};
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know if that's worth it, but these three kernels have very similar implementations, maybe something could be shared. Or perhaps that's pointless generalization.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could make it templated on two functions (one for float, one for double)?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps, or on a struct defining those two functions.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Feel free to do it or not, in any case. This can also be merged as-is :-)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it'll save us very much here (and our arithmetic functions are all written out instead of being templated).


struct Log1p {
template <typename T, typename Arg>
static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status*) {
static_assert(std::is_same<T, Arg>::value, "");
if (arg == -1) {
return -std::numeric_limits<T>::infinity();
} else if (arg < -1) {
return std::numeric_limits<T>::quiet_NaN();
}
return std::log1p(arg);
}
};

struct Log1pChecked {
template <typename T, typename Arg>
static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status* st) {
static_assert(std::is_same<T, Arg>::value, "");
if (arg == -1) {
*st = Status::Invalid("logarithm of zero");
return arg;
} else if (arg < -1) {
*st = Status::Invalid("logarithm of negative number");
return arg;
}
return std::log1p(arg);
}
};

// Generate a kernel given an arithmetic functor
template <template <typename... Args> class KernelGenerator, typename Op>
ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
Expand Down Expand Up @@ -1295,6 +1407,60 @@ const FunctionDoc atan2_doc{
"Compute the inverse tangent using argument signs to determine the quadrant",
("Integer arguments return double values."),
{"y", "x"}};

const FunctionDoc ln_doc{
"Compute natural log of arguments element-wise",
("Non-positive values return -inf or NaN. Null values return null.\n"
"Use function \"ln_checked\" if you want non-positive values to raise an error."),
{"x"}};

const FunctionDoc ln_checked_doc{
"Compute natural log of arguments element-wise",
("Non-positive values return -inf or NaN. Null values return null.\n"
"Use function \"ln\" if you want non-positive values to return "
"-inf or NaN."),
{"x"}};

const FunctionDoc log10_doc{
"Compute log base 10 of arguments element-wise",
("Non-positive values return -inf or NaN. Null values return null.\n"
"Use function \"log10_checked\" if you want non-positive values to raise an error."),
{"x"}};

const FunctionDoc log10_checked_doc{
"Compute log base 10 of arguments element-wise",
("Non-positive values return -inf or NaN. Null values return null.\n"
"Use function \"log10\" if you want non-positive values to return "
"-inf or NaN."),
{"x"}};

const FunctionDoc log2_doc{
"Compute log base 2 of arguments element-wise",
("Non-positive values return -inf or NaN. Null values return null.\n"
"Use function \"log2_checked\" if you want non-positive values to raise an error."),
{"x"}};

const FunctionDoc log2_checked_doc{
"Compute log base 2 of arguments element-wise",
("Non-positive values return -inf or NaN. Null values return null.\n"
"Use function \"log2\" if you want non-positive values to return "
"-inf or NaN."),
{"x"}};

const FunctionDoc log1p_doc{
"Compute natural log of (1+x) element-wise",
("Values <= -1 return -inf or NaN. Null values return null.\n"
"This function may be more precise than log(1 + x) for x close to zero."
"Use function \"log1p_checked\" if you want non-positive values to raise an error."),
{"x"}};

const FunctionDoc log1p_checked_doc{
"Compute natural log of (1+x) element-wise",
("Values <= -1 return -inf or NaN. Null values return null.\n"
"This function may be more precise than log(1 + x) for x close to zero."
"Use function \"log1p\" if you want non-positive values to return "
"-inf or NaN."),
{"x"}};
} // namespace

void RegisterScalarArithmetic(FunctionRegistry* registry) {
Expand Down Expand Up @@ -1460,6 +1626,36 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) {

auto atan2 = MakeArithmeticFunctionFloatingPoint<Atan2>("atan2", &atan2_doc);
DCHECK_OK(registry->AddFunction(std::move(atan2)));

// ----------------------------------------------------------------------
// Logarithms
auto ln = MakeUnaryArithmeticFunctionFloatingPoint<LogNatural>("ln", &ln_doc);
DCHECK_OK(registry->AddFunction(std::move(ln)));

auto ln_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<LogNaturalChecked>(
"ln_checked", &ln_checked_doc);
DCHECK_OK(registry->AddFunction(std::move(ln_checked)));

auto log10 = MakeUnaryArithmeticFunctionFloatingPoint<Log10>("log10", &log10_doc);
DCHECK_OK(registry->AddFunction(std::move(log10)));

auto log10_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<Log10Checked>(
"log10_checked", &log10_checked_doc);
DCHECK_OK(registry->AddFunction(std::move(log10_checked)));

auto log2 = MakeUnaryArithmeticFunctionFloatingPoint<Log2>("log2", &log2_doc);
DCHECK_OK(registry->AddFunction(std::move(log2)));

auto log2_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<Log2Checked>(
"log2_checked", &log2_checked_doc);
DCHECK_OK(registry->AddFunction(std::move(log2_checked)));

auto log1p = MakeUnaryArithmeticFunctionFloatingPoint<Log1p>("log1p", &log1p_doc);
DCHECK_OK(registry->AddFunction(std::move(log1p)));

auto log1p_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<Log1pChecked>(
"log1p_checked", &log1p_checked_doc);
DCHECK_OK(registry->AddFunction(std::move(log1p_checked)));
}

} // namespace internal
Expand Down
60 changes: 60 additions & 0 deletions cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1821,5 +1821,65 @@ TYPED_TEST(TestBinaryArithmeticFloating, TrigAtan2) {
-M_PI_2, 0, M_PI));
}

TYPED_TEST(TestUnaryArithmeticFloating, Log) {
using CType = typename TestFixture::CType;
auto ty = this->type_singleton();
this->SetNansEqual(true);
auto min_val = std::numeric_limits<CType>::min();
auto max_val = std::numeric_limits<CType>::max();
for (auto check_overflow : {false, true}) {
this->SetOverflowCheck(check_overflow);
this->AssertUnaryOp(Ln, "[1, 2.718281828459045, null, NaN, Inf]",
"[0, 1, null, NaN, Inf]");
// N.B. min() for float types is smallest normal number > 0
this->AssertUnaryOp(Ln, min_val, std::log(min_val));
this->AssertUnaryOp(Ln, max_val, std::log(max_val));
this->AssertUnaryOp(Log10, "[1, 10, null, NaN, Inf]", "[0, 1, null, NaN, Inf]");
this->AssertUnaryOp(Log10, min_val, std::log10(min_val));
this->AssertUnaryOp(Log10, max_val, std::log10(max_val));
this->AssertUnaryOp(Log2, "[1, 2, null, NaN, Inf]", "[0, 1, null, NaN, Inf]");
this->AssertUnaryOp(Log2, min_val, std::log2(min_val));
this->AssertUnaryOp(Log2, max_val, std::log2(max_val));
this->AssertUnaryOp(Log1p, "[0, 1.718281828459045, null, NaN, Inf]",
"[0, 1, null, NaN, Inf]");
this->AssertUnaryOp(Log1p, min_val, std::log1p(min_val));
this->AssertUnaryOp(Log1p, max_val, std::log1p(max_val));
}
this->SetOverflowCheck(false);
this->AssertUnaryOp(Ln, "[-Inf, -1, 0, Inf]", "[NaN, NaN, -Inf, Inf]");
this->AssertUnaryOp(Log10, "[-Inf, -1, 0, Inf]", "[NaN, NaN, -Inf, Inf]");
this->AssertUnaryOp(Log2, "[-Inf, -1, 0, Inf]", "[NaN, NaN, -Inf, Inf]");
this->AssertUnaryOp(Log1p, "[-Inf, -2, -1, Inf]", "[NaN, NaN, -Inf, Inf]");
this->SetOverflowCheck(true);
this->AssertUnaryOpRaises(Ln, "[0]", "logarithm of zero");
this->AssertUnaryOpRaises(Ln, "[-1]", "logarithm of negative number");
this->AssertUnaryOpRaises(Ln, "[-Inf]", "logarithm of negative number");

auto lowest_val = MakeScalar(std::numeric_limits<CType>::lowest());
// N.B. RapidJSON on some platforms raises "Number too big to be stored in double" so
// don't bounce through JSON
EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
::testing::HasSubstr("logarithm of negative number"),
Ln(lowest_val, this->options_));
this->AssertUnaryOpRaises(Log10, "[0]", "logarithm of zero");
this->AssertUnaryOpRaises(Log10, "[-1]", "logarithm of negative number");
this->AssertUnaryOpRaises(Log10, "[-Inf]", "logarithm of negative number");
EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
::testing::HasSubstr("logarithm of negative number"),
Log10(lowest_val, this->options_));
this->AssertUnaryOpRaises(Log2, "[0]", "logarithm of zero");
this->AssertUnaryOpRaises(Log2, "[-1]", "logarithm of negative number");
this->AssertUnaryOpRaises(Log2, "[-Inf]", "logarithm of negative number");
EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
::testing::HasSubstr("logarithm of negative number"),
Log2(lowest_val, this->options_));
this->AssertUnaryOpRaises(Log1p, "[-1]", "logarithm of zero");
this->AssertUnaryOpRaises(Log1p, "[-2]", "logarithm of negative number");
this->AssertUnaryOpRaises(Log1p, "[-Inf]", "logarithm of negative number");
EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
::testing::HasSubstr("logarithm of negative number"),
Log1p(lowest_val, this->options_));
}

Copy link
Contributor

@edponce edponce Jun 22, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In valid test cases, no need to use ty (aka this->type_singleton()) with ArrayFromJSON because that is the default type.

Moreover, PR #10395 extends the unary scalar arithmetic test class to support combinations of JSON/Array inputs which will allow you to further simplify the test statements as follows:

  • For integer inputs: AssertUnaryOp(Ln, "[1]", ArrayFromJSON(float64(), "[0]"));
  • For floating point inputs: AssertUnaryOp(Log10, "[1, 10]", "[0, 1]");

Copy link
Contributor

@edponce edponce Jun 22, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add test cases with Inf, NaN, null, min, max inputs.

For min/max you can refer to the tests of AbsoluteValue.

} // namespace compute
} // namespace arrow
Loading