From 695a16a79c2664f8f63cdc82952cfcab73d27b5e Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Tue, 27 Feb 2024 18:49:41 +0000 Subject: [PATCH] fix: fix the docs link for metrics.pairwise --- bigframes/ml/metrics/pairwise.py | 16 ++++++++++++++++ docs/reference/bigframes.ml/metrics.pairwise.rst | 7 +++++++ tests/system/small/ml/test_metrics_pairwise.py | 16 ++++++++++++++++ .../sklearn/metrics/pairwise.py | 15 +++++++++++++++ 4 files changed, 54 insertions(+) create mode 100644 docs/reference/bigframes.ml/metrics.pairwise.rst diff --git a/bigframes/ml/metrics/pairwise.py b/bigframes/ml/metrics/pairwise.py index 9ebea4ef42..ef2c08d471 100644 --- a/bigframes/ml/metrics/pairwise.py +++ b/bigframes/ml/metrics/pairwise.py @@ -50,3 +50,19 @@ def paired_manhattan_distance( paired_manhattan_distance.__doc__ = inspect.getdoc( vendored_metrics_pairwise.paired_manhattan_distance ) + + +def paired_euclidean_distances( + X: Union[bpd.DataFrame, bpd.Series], Y: Union[bpd.DataFrame, bpd.Series] +) -> bpd.DataFrame: + X, Y = utils.convert_to_dataframe(X, Y) + if len(X.columns) != 1 or len(Y.columns) != 1: + raise ValueError("Inputs X and Y can only contain 1 column.") + + base_bqml = core.BaseBqml(session=X._session) + return base_bqml.distance(X, Y, type="EUCLIDEAN", name="euclidean_distance") + + +paired_euclidean_distances.__doc__ = inspect.getdoc( + vendored_metrics_pairwise.paired_euclidean_distances +) diff --git a/docs/reference/bigframes.ml/metrics.pairwise.rst b/docs/reference/bigframes.ml/metrics.pairwise.rst new file mode 100644 index 0000000000..c20772ef07 --- /dev/null +++ b/docs/reference/bigframes.ml/metrics.pairwise.rst @@ -0,0 +1,7 @@ +bigframes.ml.metrics.pairwise +============================= + +.. automodule:: bigframes.ml.metrics.pairwise + :members: + :inherited-members: + :undoc-members: diff --git a/tests/system/small/ml/test_metrics_pairwise.py b/tests/system/small/ml/test_metrics_pairwise.py index e2aee971ee..717f32667f 100644 --- a/tests/system/small/ml/test_metrics_pairwise.py +++ b/tests/system/small/ml/test_metrics_pairwise.py @@ -47,3 +47,19 @@ def test_paired_manhattan_distance(): pd.testing.assert_frame_equal( result.to_pandas(), expected_pd_df, check_dtype=False, check_index_type=False ) + + +def test_paired_euclidean_distances(): + x_col = [np.array([4.1, 0.5, 1.0])] + y_col = [np.array([3.0, 0.0, 2.5])] + X = bpd.read_pandas(pd.DataFrame({"X": x_col})) + Y = bpd.read_pandas(pd.DataFrame({"Y": y_col})) + + result = metrics.pairwise.paired_euclidean_distances(X, Y) + expected_pd_df = pd.DataFrame( + {"X": x_col, "Y": y_col, "euclidean_distance": [1.926136]} + ) + + pd.testing.assert_frame_equal( + result.to_pandas(), expected_pd_df, check_dtype=False, check_index_type=False + ) diff --git a/third_party/bigframes_vendored/sklearn/metrics/pairwise.py b/third_party/bigframes_vendored/sklearn/metrics/pairwise.py index 5791d850ff..be3d6753a7 100644 --- a/third_party/bigframes_vendored/sklearn/metrics/pairwise.py +++ b/third_party/bigframes_vendored/sklearn/metrics/pairwise.py @@ -39,3 +39,18 @@ def paired_manhattan_distance(X, Y) -> bpd.DataFrame: bigframes.dataframe.DataFrame: DataFrame with columns of X, Y and manhattan_distance """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + +def paired_euclidean_distances(X, Y) -> bpd.DataFrame: + """Compute the paired euclidean distances between X and Y. + + Args: + X (Series or single column DataFrame of array of numeric type): + Input data. + Y (Series or single column DataFrame of array of numeric type): + Input data. X and Y are mapped by indexes, must have the same index. + + Returns: + bigframes.dataframe.DataFrame: DataFrame with columns of X, Y and euclidean_distance + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)