Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add support for numpy expm1, log1p, floor, ceil, arctan2 ops #505

Merged
merged 2 commits into from
Mar 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions bigframes/core/compile/scalar_op_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,13 @@ def arctan_op_impl(x: ibis_types.Value):
return typing.cast(ibis_types.NumericValue, x).atan()


@scalar_op_compiler.register_binary_op(ops.arctan2_op)
def arctan2_op_impl(x: ibis_types.Value, y: ibis_types.Value):
return typing.cast(ibis_types.NumericValue, x).atan2(
typing.cast(ibis_types.NumericValue, y)
)


# Hyperbolic trig functions
# BQ has these functions, but Ibis doesn't
@scalar_op_compiler.register_unary_op(ops.sinh_op)
Expand Down Expand Up @@ -319,6 +326,30 @@ def arctanh_op_impl(x: ibis_types.Value):


# Numeric Ops
@scalar_op_compiler.register_unary_op(ops.floor_op)
def floor_op_impl(x: ibis_types.Value):
x_numeric = typing.cast(ibis_types.NumericValue, x)
if x_numeric.type().is_integer():
return x_numeric.cast(ibis_dtypes.Float64())
if x_numeric.type().is_floating():
# Default ibis impl tries to cast to integer, which doesn't match pandas and can overflow
return float_floor(x_numeric)
else: # numeric
return x_numeric.floor()


@scalar_op_compiler.register_unary_op(ops.ceil_op)
def ceil_op_impl(x: ibis_types.Value):
x_numeric = typing.cast(ibis_types.NumericValue, x)
if x_numeric.type().is_integer():
return x_numeric.cast(ibis_dtypes.Float64())
if x_numeric.type().is_floating():
# Default ibis impl tries to cast to integer, which doesn't match pandas and can overflow
return float_ceil(x_numeric)
else: # numeric
return x_numeric.ceil()


@scalar_op_compiler.register_unary_op(ops.abs_op)
def abs_op_impl(x: ibis_types.Value):
return typing.cast(ibis_types.NumericValue, x).abs()
Expand Down Expand Up @@ -347,13 +378,23 @@ def ln_op_impl(x: ibis_types.Value):
return (~domain).ifelse(out_of_domain, numeric_value.ln())


@scalar_op_compiler.register_unary_op(ops.log1p_op)
def log1p_op_impl(x: ibis_types.Value):
return ln_op_impl(_ibis_num(1) + x)


@scalar_op_compiler.register_unary_op(ops.exp_op)
def exp_op_impl(x: ibis_types.Value):
numeric_value = typing.cast(ibis_types.NumericValue, x)
domain = numeric_value < _FLOAT64_EXP_BOUND
return (~domain).ifelse(_INF, numeric_value.exp())


@scalar_op_compiler.register_unary_op(ops.expm1_op)
def expm1_op_impl(x: ibis_types.Value):
return exp_op_impl(x) - _ibis_num(1)


@scalar_op_compiler.register_unary_op(ops.invert_op)
def invert_op_impl(x: ibis_types.Value):
return typing.cast(ibis_types.NumericValue, x).negate()
Expand Down Expand Up @@ -1318,3 +1359,16 @@ def _ibis_num(number: float):
@ibis.udf.scalar.builtin
def timestamp(a: str) -> ibis_dtypes.timestamp:
"""Convert string to timestamp."""


# Need these because ibis otherwise tries to do casts to int that can fail
@ibis.udf.scalar.builtin(name="floor")
def float_floor(a: float) -> float:
"""Convert string to timestamp."""
return 0 # pragma: NO COVER


@ibis.udf.scalar.builtin(name="ceil")
def float_ceil(a: float) -> float:
"""Convert string to timestamp."""
return 0 # pragma: NO COVER
10 changes: 10 additions & 0 deletions bigframes/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,11 +246,16 @@ def create_ternary_op(
arcsinh_op = create_unary_op(name="arcsinh", type_rule=op_typing.REAL_NUMERIC)
arccosh_op = create_unary_op(name="arccosh", type_rule=op_typing.REAL_NUMERIC)
arctanh_op = create_unary_op(name="arctanh", type_rule=op_typing.REAL_NUMERIC)
arctan2_op = create_binary_op(name="arctan2", type_rule=op_typing.REAL_NUMERIC)
## Numeric Ops
floor_op = create_unary_op(name="floor", type_rule=op_typing.REAL_NUMERIC)
ceil_op = create_unary_op(name="ceil", type_rule=op_typing.REAL_NUMERIC)
abs_op = create_unary_op(name="abs", type_rule=op_typing.INPUT_TYPE)
exp_op = create_unary_op(name="exp", type_rule=op_typing.REAL_NUMERIC)
expm1_op = create_unary_op(name="expm1", type_rule=op_typing.REAL_NUMERIC)
ln_op = create_unary_op(name="log", type_rule=op_typing.REAL_NUMERIC)
log10_op = create_unary_op(name="log10", type_rule=op_typing.REAL_NUMERIC)
log1p_op = create_unary_op(name="log1p", type_rule=op_typing.REAL_NUMERIC)
sqrt_op = create_unary_op(name="sqrt", type_rule=op_typing.REAL_NUMERIC)


Expand Down Expand Up @@ -540,6 +545,10 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
np.log10: log10_op,
np.sqrt: sqrt_op,
np.abs: abs_op,
np.floor: floor_op,
np.ceil: ceil_op,
np.log1p: log1p_op,
np.expm1: expm1_op,
}


Expand All @@ -549,4 +558,5 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
np.multiply: mul_op,
np.divide: div_op,
np.power: pow_op,
np.arctan2: arctan2_op,
}
22 changes: 22 additions & 0 deletions tests/system/small/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ def test_series_ufuncs(floats_pd, floats_bf, opname):
("log10",),
("sqrt",),
("abs",),
("floor",),
("ceil",),
("expm1",),
("log1p",),
],
)
def test_df_ufuncs(scalars_dfs, opname):
Expand All @@ -77,6 +81,7 @@ def test_df_ufuncs(scalars_dfs, opname):
("multiply",),
("divide",),
("power",),
("arctan2",),
],
)
def test_series_binary_ufuncs(floats_product_pd, floats_product_bf, opname):
Expand Down Expand Up @@ -112,6 +117,23 @@ def test_df_binary_ufuncs(scalars_dfs, opname):
pd.testing.assert_frame_equal(bf_result, pd_result)


@pytest.mark.parametrize(
("x", "y"),
[
("int64_col", "int64_col"),
("float64_col", "int64_col"),
],
)
def test_series_atan2(scalars_dfs, x, y):
# Test atan2 separately as pandas errors when passing entire df as input, so pass only series
scalars_df, scalars_pandas_df = scalars_dfs

bf_result = np.arctan2(scalars_df[x], scalars_df[y]).to_pandas()
pd_result = np.arctan2(scalars_pandas_df[x], scalars_pandas_df[y])

pd.testing.assert_series_equal(bf_result, pd_result)


def test_series_binary_ufuncs_reverse(scalars_dfs):
scalars_df, scalars_pandas_df = scalars_dfs

Expand Down