Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deps: update ibis to version 8.0.0 and refactor remote_function to use ibis UDF method #277

Merged
merged 44 commits into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from 43 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
8929fef
deps: migrate to `ibis-framework >= "7.0.0"`
tswast Sep 22, 2023
a4e995d
use dtype instead of output_dtype in custom ops
tswast Sep 22, 2023
8fd8fd4
adjust type annotations
tswast Sep 22, 2023
ba3c5c4
Merge remote-tracking branch 'origin/main' into b301459557-ibis-prere…
tswast Oct 13, 2023
f1ce176
Merge branch 'main' into b301459557-ibis-prerelease
tswast Nov 2, 2023
14fb7c8
Update noxfile.py
tswast Nov 13, 2023
ea8d3e5
Merge branch 'main' into b301459557-ibis-prerelease
tswast Nov 13, 2023
191df69
Merge remote-tracking branch 'origin/main' into b301459557-ibis-prere…
tswast Nov 13, 2023
b182453
update type annotations
tswast Nov 13, 2023
174e735
Merge remote-tracking branch 'origin/main' into b301459557-ibis-prere…
tswast Nov 27, 2023
0ab94cc
fix for deferred values
tswast Nov 27, 2023
4931923
fix prerelease
tswast Nov 27, 2023
9df0816
minimum 7.1.0
tswast Nov 27, 2023
71f4889
mypy
tswast Nov 27, 2023
6c64ec5
revert presubmit changes
tswast Nov 27, 2023
e37571c
fix minimum sqlglot
tswast Nov 27, 2023
fad36a8
fix custom op
tswast Nov 28, 2023
5770502
Merge remote-tracking branch 'origin/main' into b301459557-ibis-prere…
tswast Dec 14, 2023
c318b18
hack InMemoryTable formatter back in
tswast Dec 14, 2023
d3304b2
use ops module to avoid breaking changes if ops move around
tswast Dec 15, 2023
33bd2e0
workaround nullscalar issue
tswast Dec 15, 2023
c373dc0
update usage of percent_rank to explicitly order by the value
tswast Dec 15, 2023
41364b5
disable ibis prerelease tests for now
tswast Dec 15, 2023
7a8784c
fix unit_prerelease
tswast Dec 15, 2023
7b304a9
refactor: use ibis UDF functionality for remote_function
tswast Dec 15, 2023
eb5fcbc
Merge branch 'main' into tswast-ibis-udf
tswast Dec 15, 2023
5390c91
Merge branch 'main' into tswast-ibis-udf
tswast Dec 19, 2023
7393b16
Merge remote-tracking branch 'origin/main' into tswast-ibis-udf
tswast Dec 21, 2023
b09c549
fix tests except for one remaining issue with deferred ordering id
tswast Dec 21, 2023
cdc17a3
Merge remote-tracking branch 'origin/main' into tswast-ibis-udf
tswast Jan 2, 2024
a142b29
use unordered mode with pandas backend in unit tests
tswast Jan 2, 2024
aaeabd4
fix constraints
tswast Jan 2, 2024
c4ad90f
Merge remote-tracking branch 'origin/main' into tswast-ibis-udf
tswast Feb 26, 2024
63cc0f3
try ibis 8.0.0
tswast Feb 26, 2024
c434824
fix unit tests
tswast Feb 26, 2024
2bd2573
fix tests
tswast Feb 26, 2024
16415c2
Merge remote-tracking branch 'origin/main' into tswast-ibis-udf
tswast Feb 26, 2024
24038f1
fix unit_prerelease tests
tswast Feb 27, 2024
56ca094
fix remote function tests
tswast Feb 27, 2024
d4e8d51
fix nlargest/nsmallest
tswast Feb 27, 2024
a180d8c
Merge remote-tracking branch 'origin/main' into tswast-ibis-udf
tswast Feb 27, 2024
d2de4d7
Merge branch 'main' into tswast-ibis-udf
tswast Feb 27, 2024
54df52b
synchronize max sqlglot with ibis
tswast Feb 27, 2024
cf0ee34
Merge branch 'main' into tswast-ibis-udf
tswast Feb 28, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bigframes/core/compile/aggregate_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ def _(
op: agg_ops.RankOp, column: ibis_types.Column, window=None
) -> ibis_types.IntegerValue:
# Ibis produces 0-based ranks, while pandas creates 1-based ranks
return _apply_window_if_present(column.rank(), window) + 1
return _apply_window_if_present(ibis.rank(), window) + 1


@compile_unary_agg.register
Expand Down
15 changes: 6 additions & 9 deletions bigframes/core/compile/compiled.py
Original file line number Diff line number Diff line change
Expand Up @@ -1099,17 +1099,14 @@ def _to_ibis_expr(
if not columns:
return ibis.memtable([])

# Make sure we don't have any unbound (deferred) columns.
table = self._table.select(columns)

# Make sure all dtypes are the "canonical" ones for BigFrames. This is
# important for operations like UNION where the schema must match.
table = self._table.select(
bigframes.dtypes.ibis_value_to_canonical_type(
column.resolve(self._table)
# TODO(https://github.com/ibis-project/ibis/issues/7613): use
# public API to refer to Deferred type.
if isinstance(column, ibis.common.deferred.Deferred)
else column
)
for column in columns
table = table.select(
bigframes.dtypes.ibis_value_to_canonical_type(table[column])
for column in table.columns
)
base_table = table
if self._reduced_predicate is not None:
Expand Down
4 changes: 2 additions & 2 deletions bigframes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import geopandas as gpd # type: ignore
import google.cloud.bigquery as bigquery
import ibis
from ibis.backends.bigquery.datatypes import BigQueryType
import ibis.expr.datatypes as ibis_dtypes
from ibis.expr.datatypes.core import dtype as python_type_to_bigquery_type
import ibis.expr.types as ibis_types
Expand All @@ -33,6 +32,7 @@

import bigframes.constants as constants
import third_party.bigframes_vendored.google_cloud_bigquery._pandas_helpers as gcb3p_pandas_helpers
import third_party.bigframes_vendored.ibis.backends.bigquery.datatypes as third_party_ibis_bqtypes
import third_party.bigframes_vendored.ibis.expr.operations as vendored_ibis_ops

# Type hints for Pandas dtypes supported by BigQuery DataFrame
Expand Down Expand Up @@ -643,4 +643,4 @@ def ibis_type_from_python_type(t: type) -> ibis_dtypes.DataType:
def ibis_type_from_type_kind(tk: bigquery.StandardSqlTypeNames) -> ibis_dtypes.DataType:
if tk not in SUPPORTED_IO_BIGQUERY_TYPEKINDS:
raise UnsupportedTypeError(tk, SUPPORTED_IO_BIGQUERY_TYPEKINDS)
return BigQueryType.to_ibis(tk)
return third_party_ibis_bqtypes.BigQueryType.to_ibis(tk)
71 changes: 31 additions & 40 deletions bigframes/functions/remote_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

from __future__ import annotations

import functools
import hashlib
import inspect
import logging
Expand All @@ -28,6 +27,7 @@
import textwrap
from typing import List, NamedTuple, Optional, Sequence, TYPE_CHECKING

import ibis
import requests

if TYPE_CHECKING:
Expand All @@ -43,15 +43,12 @@
resourcemanager_v3,
)
import google.iam.v1
from ibis.backends.bigquery.compiler import compiles
from ibis.backends.bigquery.datatypes import BigQueryType
from ibis.expr.datatypes.core import DataType as IbisDataType
import ibis.expr.operations as ops
import ibis.expr.rules as rlz

from bigframes import clients
import bigframes.constants as constants
import bigframes.dtypes
import third_party.bigframes_vendored.ibis.backends.bigquery.datatypes as third_party_ibis_bqtypes

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -173,12 +170,14 @@ def create_bq_remote_function(
# Create BQ function
# https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions#create_a_remote_function_2
bq_function_args = []
bq_function_return_type = BigQueryType.from_ibis(output_type)
bq_function_return_type = third_party_ibis_bqtypes.BigQueryType.from_ibis(
output_type
)

# We are expecting the input type annotations to be 1:1 with the input args
for idx, name in enumerate(input_args):
bq_function_args.append(
f"{name} {BigQueryType.from_ibis(input_types[idx])}"
f"{name} {third_party_ibis_bqtypes.BigQueryType.from_ibis(input_types[idx])}"
)
create_function_ddl = f"""
CREATE OR REPLACE FUNCTION `{self._gcp_project_id}.{self._bq_dataset}`.{bq_function_name}({','.join(bq_function_args)})
Expand Down Expand Up @@ -515,33 +514,10 @@ def get_remote_function_specs(self, remote_function_name):
return (http_endpoint, bq_connection)


def remote_function_node(
routine_ref: bigquery.RoutineReference, ibis_signature: IbisSignature
):
"""Creates an Ibis node representing a remote function call."""

fields = {
name: rlz.ValueOf(None if type_ == "ANY TYPE" else type_)
for name, type_ in zip(
ibis_signature.parameter_names, ibis_signature.input_types
)
}

fields["dtype"] = ibis_signature.output_type # type: ignore
fields["shape"] = rlz.shape_like("args")

node = type(routine_ref_to_string_for_query(routine_ref), (ops.ValueOp,), fields) # type: ignore

@compiles(node)
def compile_node(t, op):
return "{}({})".format(node.__name__, ", ".join(map(t.translate, op.args)))

def f(*args, **kwargs):
return node(*args, **kwargs).to_expr()

f.bigframes_remote_function = str(routine_ref) # type: ignore

return f
class UnsupportedTypeError(ValueError):
def __init__(self, type_, supported_types):
self.type = type_
self.supported_types = supported_types


def ibis_signature_from_python_signature(
Expand Down Expand Up @@ -831,14 +807,16 @@ def wrapper(f):
packages,
)

node = remote_function_node(dataset_ref.routine(rf_name), ibis_signature)

node = functools.wraps(f)(node)
node.__signature__ = signature
node = ibis.udf.scalar.builtin(
f,
name=rf_name,
schema=f"{dataset_ref.project}.{dataset_ref.dataset_id}",
signature=(ibis_signature.input_types, ibis_signature.output_type),
)
node.bigframes_cloud_function = (
remote_function_client.get_cloud_function_fully_qualified_name(cf_name)
)

node.bigframes_remote_function = str(dataset_ref.routine(rf_name)) # type: ignore
return node

return wrapper
Expand Down Expand Up @@ -888,4 +866,17 @@ def read_gbq_function(
f"{constants.FEEDBACK_LINK}"
)

return remote_function_node(routine_ref, ibis_signature)
# The name "args" conflicts with the Ibis operator, so we use
# non-standard names for the arguments here.
def node(*ignored_args, **ignored_kwargs):
f"""Remote function {str(routine_ref)}."""

node.__name__ = routine_ref.routine_id
node = ibis.udf.scalar.builtin(
node,
name=routine_ref.routine_id,
schema=f"{routine_ref.project}.{routine_ref.dataset_id}",
signature=(ibis_signature.input_types, ibis_signature.output_type),
)
node.bigframes_remote_function = str(routine_ref) # type: ignore
return node
6 changes: 3 additions & 3 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,12 +565,12 @@ def prerelease(session: nox.sessions.Session, tests_path):
# session.install(
# "--upgrade",
# "-e", # Use -e so that py.typed file is included.
# "git+https://github.com/ibis-project/ibis.git@7.x.x#egg=ibis-framework",
# "git+https://github.com/ibis-project/ibis.git#egg=ibis-framework",
# )
session.install(
"--upgrade",
# "--pre",
"ibis-framework>=7.1.0,<7.2.0dev",
"--pre",
"ibis-framework>=8.0.0,<9.0.0dev",
)
already_installed.add("ibis-framework")

Expand Down
5 changes: 2 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@
"google-cloud-iam >=2.12.1",
"google-cloud-resource-manager >=1.10.3",
"google-cloud-storage >=2.0.0",
# TODO: Relax upper bound once we have fixed unit tests with 7.2.0.
"ibis-framework[bigquery] >=7.1.0,<7.2.0dev",
"ibis-framework[bigquery] >=8.0.0,<9.0.0dev",
# TODO: Relax upper bound once we have fixed `system_prerelease` tests.
"pandas >=1.5.0,<2.1.4",
"pydata-google-auth >=1.8.2",
Expand All @@ -55,7 +54,7 @@
# Keep sqlglot versions in sync with ibis-framework. This avoids problems
# where the incorrect version of sqlglot is installed, such as
# https://github.com/googleapis/python-bigquery-dataframes/issues/315
"sqlglot >=19.9.0,<20",
"sqlglot >=20.8.0,<=20.11",
"tabulate >= 0.9",
"ipywidgets >=7.7.1",
"humanize >= 4.6.0",
Expand Down
4 changes: 2 additions & 2 deletions testing/constraints-3.9.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ google-cloud-bigquery-connection==1.12.0
google-cloud-iam==2.12.1
google-cloud-resource-manager==1.10.3
google-cloud-storage==2.0.0
ibis-framework==7.1.0
ibis-framework==8.0.0
pandas==1.5.0
pydata-google-auth==1.8.2
requests==2.27.1
scikit-learn==1.2.2
sqlalchemy==1.4
sqlglot==19.9.0
sqlglot==20.8.0
tabulate==0.9
ipywidgets==7.7.1
humanize==4.6.0
Expand Down
10 changes: 4 additions & 6 deletions tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,15 +157,13 @@ def test_tail_with_custom_column_labels(scalars_df_index, scalars_pandas_df_inde
],
)
def test_df_nlargest(scalars_df_index, scalars_pandas_df_index, keep):
bf_result = scalars_df_index.nlargest(
3, ["bool_col", "int64_too"], keep=keep
).to_pandas()
bf_result = scalars_df_index.nlargest(3, ["bool_col", "int64_too"], keep=keep)
pd_result = scalars_pandas_df_index.nlargest(
3, ["bool_col", "int64_too"], keep=keep
)

pd.testing.assert_frame_equal(
bf_result,
bf_result.to_pandas(),
pd_result,
)

Expand All @@ -179,11 +177,11 @@ def test_df_nlargest(scalars_df_index, scalars_pandas_df_index, keep):
],
)
def test_df_nsmallest(scalars_df_index, scalars_pandas_df_index, keep):
bf_result = scalars_df_index.nsmallest(6, ["bool_col"], keep=keep).to_pandas()
bf_result = scalars_df_index.nsmallest(6, ["bool_col"], keep=keep)
pd_result = scalars_pandas_df_index.nsmallest(6, ["bool_col"], keep=keep)

pd.testing.assert_frame_equal(
bf_result,
bf_result.to_pandas(),
pd_result,
)

Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def test_arrayvalues_to_ibis_expr_with_concat():
total_ordering_columns=["col1"],
)
expr = value.concat([value])
actual = expr._compile_ordered()._to_ibis_expr(ordering_mode="unordered")
actual = expr._compile_unordered()._to_ibis_expr()
assert len(actual.columns) == 3
# TODO(ashleyxu, b/299631930): test out the union expression
assert actual.columns[0] == "column_0"
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/test_remote_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from ibis.backends.bigquery import datatypes as bq_types
from ibis.expr import datatypes as ibis_types

import bigframes.dtypes
import third_party.bigframes_vendored.ibis.backends.bigquery.datatypes as third_party_ibis_bqtypes


def test_supported_types_correspond():
Expand All @@ -24,7 +24,7 @@ def test_supported_types_correspond():
ibis_types.dtype(t) for t in bigframes.dtypes.SUPPORTED_IO_PYTHON_TYPES
}
ibis_types_from_bigquery = {
bq_types.BigQueryType.to_ibis(tk)
third_party_ibis_bqtypes.BigQueryType.to_ibis(tk)
for tk in bigframes.dtypes.SUPPORTED_IO_BIGQUERY_TYPEKINDS
}

Expand Down
Loading