Skip to content

Commit

Permalink
Merge branch 'main' into shuowei-open-bq-studio
Browse files Browse the repository at this point in the history
  • Loading branch information
shuoweil authored Dec 16, 2024
2 parents 34d74d0 + 4c3548f commit f5bdafb
Show file tree
Hide file tree
Showing 9 changed files with 265 additions and 0 deletions.
11 changes: 11 additions & 0 deletions bigframes/core/compile/scalar_op_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -967,6 +967,17 @@ def normalize_op_impl(x: ibis_types.Value):
return result.cast(result_type)


# Geo Ops
@scalar_op_compiler.register_unary_op(ops.geo_x_op)
def geo_x_op_impl(x: ibis_types.Value):
return typing.cast(ibis_types.GeoSpatialValue, x).x()


@scalar_op_compiler.register_unary_op(ops.geo_y_op)
def geo_y_op_impl(x: ibis_types.Value):
return typing.cast(ibis_types.GeoSpatialValue, x).y()


# Parameterized ops
@scalar_op_compiler.register_unary_op(ops.StructFieldOp, pass_op=True)
def struct_field_op_impl(x: ibis_types.Value, op: ops.StructFieldOp):
Expand Down
4 changes: 4 additions & 0 deletions bigframes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,10 @@ def is_time_like(type_: ExpressionType) -> bool:
return type_ in (DATETIME_DTYPE, TIMESTAMP_DTYPE, TIME_DTYPE)


def is_geo_like(type_: ExpressionType) -> bool:
return type_ in (GEO_DTYPE,)


def is_binary_like(type_: ExpressionType) -> bool:
return type_ in (BOOL_DTYPE, BYTES_DTYPE, INT_DTYPE)

Expand Down
13 changes: 13 additions & 0 deletions bigframes/geopandas/geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import bigframes_vendored.geopandas.geoseries as vendored_geoseries
import geopandas.array # type: ignore

import bigframes.operations as ops
import bigframes.series


Expand All @@ -26,3 +27,15 @@ def __init__(self, data=None, index=None, **kwargs):
super().__init__(
data=data, index=index, dtype=geopandas.array.GeometryDtype(), **kwargs
)

@property
def x(self) -> bigframes.series.Series:
series = self._apply_unary_op(ops.geo_x_op)
series.name = None
return series

@property
def y(self) -> bigframes.series.Series:
series = self._apply_unary_op(ops.geo_y_op)
series.name = None
return series
13 changes: 13 additions & 0 deletions bigframes/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,19 @@ def create_binary_op(
arctanh_op = create_unary_op(
name="arctanh", type_signature=op_typing.UNARY_REAL_NUMERIC
)
# Geo Ops
geo_x_op = create_unary_op(
name="geo_x",
type_signature=op_typing.FixedOutputType(
dtypes.is_geo_like, dtypes.FLOAT_DTYPE, description="geo-like"
),
)
geo_y_op = create_unary_op(
name="geo_y",
type_signature=op_typing.FixedOutputType(
dtypes.is_geo_like, dtypes.FLOAT_DTYPE, description="geo-like"
),
)
## Numeric Ops
floor_op = create_unary_op(name="floor", type_signature=op_typing.UNARY_REAL_NUMERIC)
ceil_op = create_unary_op(name="ceil", type_signature=op_typing.UNARY_REAL_NUMERIC)
Expand Down
22 changes: 22 additions & 0 deletions tests/data/urban_areas.jsonl

Large diffs are not rendered by default.

72 changes: 72 additions & 0 deletions tests/data/urban_areas_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
[
{
"mode": "NULLABLE",
"name": "geo_id",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "urban_area_code",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "lsad_name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "area_lsad_code",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "mtfcc_feature_class_code",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "type",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "functional_status",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "area_land_meters",
"type": "FLOAT"
},
{
"mode": "NULLABLE",
"name": "area_water_meters",
"type": "FLOAT"
},
{
"mode": "NULLABLE",
"name": "internal_point_lon",
"type": "FLOAT"
},
{
"mode": "NULLABLE",
"name": "internal_point_lat",
"type": "FLOAT"
},
{
"mode": "NULLABLE",
"name": "internal_point_geom",
"type": "GEOGRAPHY"
},
{
"mode": "NULLABLE",
"name": "urban_area_geom",
"type": "GEOGRAPHY"
}
]
6 changes: 6 additions & 0 deletions tests/system/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@ def load_test_data_tables(
("hockey_players", "hockey_players.json", "hockey_players.jsonl"),
("matrix_2by3", "matrix_2by3.json", "matrix_2by3.jsonl"),
("matrix_3by4", "matrix_3by4.json", "matrix_3by4.jsonl"),
("urban_areas", "urban_areas_schema.json", "urban_areas.jsonl"),
]:
test_data_hash = hashlib.md5()
_hash_digest_file(test_data_hash, DATA_DIR / schema_filename)
Expand Down Expand Up @@ -400,6 +401,11 @@ def penguins_table_id(test_data_tables) -> str:
return test_data_tables["penguins"]


@pytest.fixture(scope="session")
def urban_areas_table_id(test_data_tables) -> str:
return test_data_tables["urban_areas"]


@pytest.fixture(scope="session")
def time_series_table_id(test_data_tables) -> str:
return test_data_tables["time_series"]
Expand Down
63 changes: 63 additions & 0 deletions tests/system/small/geopandas/test_geoseries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import geopandas # type: ignore
import google.api_core.exceptions
import pandas as pd
import pytest

import bigframes.geopandas
import bigframes.series
from tests.system.utils import assert_series_equal


@pytest.fixture(scope="session")
def urban_areas_dfs(session, urban_areas_table_id):
bf_ua = session.read_gbq(urban_areas_table_id, index_col="geo_id")
pd_ua = bf_ua.to_pandas()
return (bf_ua, pd_ua)


def test_geo_x(urban_areas_dfs):
bf_ua, pd_ua = urban_areas_dfs
bf_series: bigframes.geopandas.GeoSeries = bf_ua["internal_point_geom"].geo
pd_series: geopandas.GeoSeries = geopandas.GeoSeries(pd_ua["internal_point_geom"])
bf_result = bf_series.x.to_pandas()
pd_result = pd_series.x

assert_series_equal(
pd_result.astype(pd.Float64Dtype()),
bf_result,
)


def test_geo_x_non_point(urban_areas_dfs):
bf_ua, _ = urban_areas_dfs
bf_series: bigframes.geopandas.GeoSeries = bf_ua["urban_area_geom"].geo

with pytest.raises(google.api_core.exceptions.BadRequest, match="ST_X"):
bf_series.x.to_pandas()


def test_geo_y(urban_areas_dfs):
bf_ua, pd_ua = urban_areas_dfs
bf_series: bigframes.geopandas.GeoSeries = bf_ua["internal_point_geom"].geo
pd_series: geopandas.GeoSeries = geopandas.GeoSeries(pd_ua["internal_point_geom"])
bf_result = bf_series.y.to_pandas()
pd_result = pd_series.y

assert_series_equal(
pd_result.astype(pd.Float64Dtype()),
bf_result,
)
61 changes: 61 additions & 0 deletions third_party/bigframes_vendored/geopandas/geoseries.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# contains code from https://github.com/geopandas/geopandas/blob/main/geopandas/geoseries.py
from __future__ import annotations

from typing import TYPE_CHECKING

from bigframes import constants

if TYPE_CHECKING:
import bigframes.series


class GeoSeries:
"""
Expand Down Expand Up @@ -28,3 +35,57 @@ class GeoSeries:
Additional arguments passed to the Series constructor,
e.g. ``name``.
"""

@property
def x(self) -> bigframes.series.Series:
"""Return the x location of point geometries in a GeoSeries
**Examples:**
>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None
>>> import geopandas.array
>>> import shapely
>>> series = bpd.Series(
... [shapely.Point(1, 2), shapely.Point(2, 3), shapely.Point(3, 4)],
... dtype=geopandas.array.GeometryDtype()
... )
>>> series.geo.x
0 1.0
1 2.0
2 3.0
dtype: Float64
Returns:
bigframes.series.Series:
Return the x location (longitude) of point geometries.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

@property
def y(self) -> bigframes.series.Series:
"""Return the y location of point geometries in a GeoSeries
**Examples:**
>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None
>>> import geopandas.array
>>> import shapely
>>> series = bpd.Series(
... [shapely.Point(1, 2), shapely.Point(2, 3), shapely.Point(3, 4)],
... dtype=geopandas.array.GeometryDtype()
... )
>>> series.geo.y
0 2.0
1 3.0
2 4.0
dtype: Float64
Returns:
bigframes.series.Series:
Return the y location (latitude) of point geometries.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

0 comments on commit f5bdafb

Please sign in to comment.