From 9d318653c001287bcc8ae9d8e09d0187413cbed6 Mon Sep 17 00:00:00 2001 From: Ashley Xu <139821907+ashleyxuu@users.noreply.github.com> Date: Tue, 27 Feb 2024 07:30:58 -0800 Subject: [PATCH] feat: Add ml.metrics.pairwise.manhattan_distance (#392) --- bigframes/ml/metrics/pairwise.py | 16 ++++++++++++++++ tests/system/small/ml/test_metrics_pairwise.py | 14 ++++++++++++++ .../sklearn/metrics/pairwise.py | 15 +++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/bigframes/ml/metrics/pairwise.py b/bigframes/ml/metrics/pairwise.py index 35b64c7850..9ebea4ef42 100644 --- a/bigframes/ml/metrics/pairwise.py +++ b/bigframes/ml/metrics/pairwise.py @@ -34,3 +34,19 @@ def paired_cosine_distances( paired_cosine_distances.__doc__ = inspect.getdoc( vendored_metrics_pairwise.paired_cosine_distances ) + + +def paired_manhattan_distance( + X: Union[bpd.DataFrame, bpd.Series], Y: Union[bpd.DataFrame, bpd.Series] +) -> bpd.DataFrame: + X, Y = utils.convert_to_dataframe(X, Y) + if len(X.columns) != 1 or len(Y.columns) != 1: + raise ValueError("Inputs X and Y can only contain 1 column.") + + base_bqml = core.BaseBqml(session=X._session) + return base_bqml.distance(X, Y, type="MANHATTAN", name="manhattan_distance") + + +paired_manhattan_distance.__doc__ = inspect.getdoc( + vendored_metrics_pairwise.paired_manhattan_distance +) diff --git a/tests/system/small/ml/test_metrics_pairwise.py b/tests/system/small/ml/test_metrics_pairwise.py index 47bd1e18d0..e2aee971ee 100644 --- a/tests/system/small/ml/test_metrics_pairwise.py +++ b/tests/system/small/ml/test_metrics_pairwise.py @@ -33,3 +33,17 @@ def test_paired_cosine_distances(): pd.testing.assert_frame_equal( result.to_pandas(), expected_pd_df, check_dtype=False, check_index_type=False ) + + +def test_paired_manhattan_distance(): + x_col = [np.array([4.1, 0.5, 1.0])] + y_col = [np.array([3.0, 0.0, 2.5])] + X = bpd.read_pandas(pd.DataFrame({"X": x_col})) + Y = bpd.read_pandas(pd.DataFrame({"Y": y_col})) + + result = metrics.pairwise.paired_manhattan_distance(X, Y) + expected_pd_df = pd.DataFrame({"X": x_col, "Y": y_col, "manhattan_distance": [3.1]}) + + pd.testing.assert_frame_equal( + result.to_pandas(), expected_pd_df, check_dtype=False, check_index_type=False + ) diff --git a/third_party/bigframes_vendored/sklearn/metrics/pairwise.py b/third_party/bigframes_vendored/sklearn/metrics/pairwise.py index c309b08d88..5791d850ff 100644 --- a/third_party/bigframes_vendored/sklearn/metrics/pairwise.py +++ b/third_party/bigframes_vendored/sklearn/metrics/pairwise.py @@ -24,3 +24,18 @@ def paired_cosine_distances(X, Y) -> bpd.DataFrame: bigframes.dataframe.DataFrame: DataFrame with columns of X, Y and cosine_distance """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + +def paired_manhattan_distance(X, Y) -> bpd.DataFrame: + """Compute the L1 distances between the vectors in X and Y. + + Args: + X (Series or single column DataFrame of array of numeric type): + Input data. + Y (Series or single column DataFrame of array of numeric type): + Input data. X and Y are mapped by indexes, must have the same index. + + Returns: + bigframes.dataframe.DataFrame: DataFrame with columns of X, Y and manhattan_distance + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)