Skip to content

Commit

Permalink
move Floor, Gcd, Lcm, Pi to datafusion-functions
Browse files Browse the repository at this point in the history
  • Loading branch information
Omega359 committed Apr 6, 2024
1 parent d201ec7 commit 118caff
Show file tree
Hide file tree
Showing 21 changed files with 588 additions and 343 deletions.
29 changes: 1 addition & 28 deletions datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,20 +45,12 @@ pub enum BuiltinScalarFunction {
Exp,
/// factorial
Factorial,
/// floor
Floor,
/// gcd, Greatest common divisor
Gcd,
/// lcm, Least common multiple
Lcm,
/// iszero
Iszero,
/// log, same as log10
Log,
/// nanvl
Nanvl,
/// pi
Pi,
/// power
Power,
/// round
Expand Down Expand Up @@ -135,13 +127,9 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Coalesce => Volatility::Immutable,
BuiltinScalarFunction::Exp => Volatility::Immutable,
BuiltinScalarFunction::Factorial => Volatility::Immutable,
BuiltinScalarFunction::Floor => Volatility::Immutable,
BuiltinScalarFunction::Gcd => Volatility::Immutable,
BuiltinScalarFunction::Iszero => Volatility::Immutable,
BuiltinScalarFunction::Lcm => Volatility::Immutable,
BuiltinScalarFunction::Log => Volatility::Immutable,
BuiltinScalarFunction::Nanvl => Volatility::Immutable,
BuiltinScalarFunction::Pi => Volatility::Immutable,
BuiltinScalarFunction::Power => Volatility::Immutable,
BuiltinScalarFunction::Round => Volatility::Immutable,
BuiltinScalarFunction::Cot => Volatility::Immutable,
Expand Down Expand Up @@ -183,13 +171,10 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::InitCap => {
utf8_to_str_type(&input_expr_types[0], "initcap")
}
BuiltinScalarFunction::Pi => Ok(Float64),
BuiltinScalarFunction::Random => Ok(Float64),
BuiltinScalarFunction::EndsWith => Ok(Boolean),

BuiltinScalarFunction::Factorial
| BuiltinScalarFunction::Gcd
| BuiltinScalarFunction::Lcm => Ok(Int64),
BuiltinScalarFunction::Factorial => Ok(Int64),

BuiltinScalarFunction::Power => match &input_expr_types[0] {
Int64 => Ok(Int64),
Expand All @@ -210,7 +195,6 @@ impl BuiltinScalarFunction {

BuiltinScalarFunction::Ceil
| BuiltinScalarFunction::Exp
| BuiltinScalarFunction::Floor
| BuiltinScalarFunction::Round
| BuiltinScalarFunction::Trunc
| BuiltinScalarFunction::Cot => match input_expr_types[0] {
Expand Down Expand Up @@ -248,7 +232,6 @@ impl BuiltinScalarFunction {
],
self.volatility(),
),
BuiltinScalarFunction::Pi => Signature::exact(vec![], self.volatility()),
BuiltinScalarFunction::Random => Signature::exact(vec![], self.volatility()),
BuiltinScalarFunction::Power => Signature::one_of(
vec![Exact(vec![Int64, Int64]), Exact(vec![Float64, Float64])],
Expand Down Expand Up @@ -289,12 +272,8 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Factorial => {
Signature::uniform(1, vec![Int64], self.volatility())
}
BuiltinScalarFunction::Gcd | BuiltinScalarFunction::Lcm => {
Signature::uniform(2, vec![Int64], self.volatility())
}
BuiltinScalarFunction::Ceil
| BuiltinScalarFunction::Exp
| BuiltinScalarFunction::Floor
| BuiltinScalarFunction::Cot => {
// math expressions expect 1 argument of type f64 or f32
// priority is given to f64 because e.g. `sqrt(1i32)` is in IR (real numbers) and thus we
Expand All @@ -319,10 +298,8 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Ceil
| BuiltinScalarFunction::Exp
| BuiltinScalarFunction::Factorial
| BuiltinScalarFunction::Floor
| BuiltinScalarFunction::Round
| BuiltinScalarFunction::Trunc
| BuiltinScalarFunction::Pi
) {
Some(vec![Some(true)])
} else if *self == BuiltinScalarFunction::Log {
Expand All @@ -339,13 +316,9 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Cot => &["cot"],
BuiltinScalarFunction::Exp => &["exp"],
BuiltinScalarFunction::Factorial => &["factorial"],
BuiltinScalarFunction::Floor => &["floor"],
BuiltinScalarFunction::Gcd => &["gcd"],
BuiltinScalarFunction::Iszero => &["iszero"],
BuiltinScalarFunction::Lcm => &["lcm"],
BuiltinScalarFunction::Log => &["log"],
BuiltinScalarFunction::Nanvl => &["nanvl"],
BuiltinScalarFunction::Pi => &["pi"],
BuiltinScalarFunction::Power => &["power", "pow"],
BuiltinScalarFunction::Random => &["random"],
BuiltinScalarFunction::Round => &["round"],
Expand Down
17 changes: 1 addition & 16 deletions datafusion/expr/src/expr_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -298,11 +298,6 @@ pub fn concat_ws(sep: Expr, values: Vec<Expr>) -> Expr {
))
}

/// Returns an approximate value of π
pub fn pi() -> Expr {
Expr::ScalarFunction(ScalarFunction::new(BuiltinScalarFunction::Pi, vec![]))
}

/// Returns a random value in the range 0.0 <= x < 1.0
pub fn random() -> Expr {
Expr::ScalarFunction(ScalarFunction::new(BuiltinScalarFunction::Random, vec![]))
Expand Down Expand Up @@ -538,12 +533,6 @@ macro_rules! nary_scalar_expr {
// math functions
scalar_expr!(Cot, cot, num, "cotangent of a number");
scalar_expr!(Factorial, factorial, num, "factorial");
scalar_expr!(
Floor,
floor,
num,
"nearest integer less than or equal to argument"
);
scalar_expr!(
Ceil,
ceil,
Expand All @@ -557,8 +546,7 @@ nary_scalar_expr!(
"truncate toward zero, with optional precision"
);
scalar_expr!(Exp, exp, num, "exponential");
scalar_expr!(Gcd, gcd, arg_1 arg_2, "greatest common divisor");
scalar_expr!(Lcm, lcm, arg_1 arg_2, "least common multiple");

scalar_expr!(Power, power, base exponent, "`base` raised to the power of `exponent`");
scalar_expr!(Log, log, base x, "logarithm of a `x` for a particular `base`");

Expand Down Expand Up @@ -1058,7 +1046,6 @@ mod test {
fn scalar_function_definitions() {
test_unary_scalar_expr!(Cot, cot);
test_unary_scalar_expr!(Factorial, factorial);
test_unary_scalar_expr!(Floor, floor);
test_unary_scalar_expr!(Ceil, ceil);
test_nary_scalar_expr!(Round, round, input);
test_nary_scalar_expr!(Round, round, input, decimal_places);
Expand All @@ -1068,8 +1055,6 @@ mod test {
test_scalar_expr!(Nanvl, nanvl, x, y);
test_scalar_expr!(Iszero, iszero, input);

test_scalar_expr!(Gcd, gcd, arg_1, arg_2);
test_scalar_expr!(Lcm, lcm, arg_1, arg_2);
test_scalar_expr!(InitCap, initcap, string);
test_scalar_expr!(EndsWith, ends_with, string, characters);
}
Expand Down
145 changes: 145 additions & 0 deletions datafusion/functions/src/math/gcd.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use arrow::array::{ArrayRef, Int64Array};
use std::any::Any;
use std::mem::swap;
use std::sync::Arc;

use arrow::datatypes::DataType;
use arrow::datatypes::DataType::Int64;

use crate::utils::make_scalar_function;
use datafusion_common::{exec_err, DataFusionError, Result};
use datafusion_expr::ColumnarValue;
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};

#[derive(Debug)]
pub struct GcdFunc {
signature: Signature,
}

impl Default for GcdFunc {
fn default() -> Self {
Self::new()
}
}

impl GcdFunc {
pub fn new() -> Self {
use DataType::*;
Self {
signature: Signature::uniform(2, vec![Int64], Volatility::Immutable),
}
}
}

impl ScalarUDFImpl for GcdFunc {
fn as_any(&self) -> &dyn Any {
self
}

fn name(&self) -> &str {
"gcd"
}

fn signature(&self) -> &Signature {
&self.signature
}

fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
Ok(Int64)
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
make_scalar_function(gcd, vec![])(args)
}
}

/// Gcd SQL function
fn gcd(args: &[ArrayRef]) -> Result<ArrayRef> {
match args[0].data_type() {
Int64 => Ok(Arc::new(make_function_inputs2!(
&args[0],
&args[1],
"x",
"y",
Int64Array,
Int64Array,
{ compute_gcd }
)) as ArrayRef),
other => exec_err!("Unsupported data type {other:?} for function gcd"),
}
}

/// Computes greatest common divisor using Binary GCD algorithm.
pub fn compute_gcd(x: i64, y: i64) -> i64 {
let mut a = x.wrapping_abs();
let mut b = y.wrapping_abs();

if a == 0 {
return b;
}
if b == 0 {
return a;
}

let shift = (a | b).trailing_zeros();
a >>= shift;
b >>= shift;
a >>= a.trailing_zeros();

loop {
b >>= b.trailing_zeros();
if a > b {
swap(&mut a, &mut b);
}

b -= a;

if b == 0 {
return a << shift;
}
}
}

#[cfg(test)]
mod test {
use std::sync::Arc;

use arrow::array::{ArrayRef, Int64Array};

use crate::math::gcd::gcd;
use datafusion_common::cast::as_int64_array;

#[test]
fn test_gcd_i64() {
let args: Vec<ArrayRef> = vec![
Arc::new(Int64Array::from(vec![0, 3, 25, -16])), // x
Arc::new(Int64Array::from(vec![0, -2, 15, 8])), // y
];

let result = gcd(&args).expect("failed to initialize function gcd");
let ints = as_int64_array(&result).expect("failed to initialize function gcd");

assert_eq!(ints.len(), 4);
assert_eq!(ints.value(0), 0);
assert_eq!(ints.value(1), 1);
assert_eq!(ints.value(2), 5);
assert_eq!(ints.value(3), 8);
}
}
Loading

0 comments on commit 118caff

Please sign in to comment.