Skip to content

Commit

Permalink
fix!: rename cosine_similarity to paired_cosine_distances (#393)
Browse files Browse the repository at this point in the history
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕
  • Loading branch information
GarrettWu authored Feb 26, 2024
1 parent a0490a4 commit 81ece46
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 14 deletions.
8 changes: 5 additions & 3 deletions bigframes/ml/metrics/pairwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,17 @@
import third_party.bigframes_vendored.sklearn.metrics.pairwise as vendored_metrics_pairwise


def cosine_similarity(
def paired_cosine_distances(
X: Union[bpd.DataFrame, bpd.Series], Y: Union[bpd.DataFrame, bpd.Series]
) -> bpd.DataFrame:
X, Y = utils.convert_to_dataframe(X, Y)
if len(X.columns) != 1 or len(Y.columns) != 1:
raise ValueError("Inputs X and Y can only contain 1 column.")

base_bqml = core.BaseBqml(session=X._session)
return base_bqml.distance(X, Y, type="COSINE", name="cosine_similarity")
return base_bqml.distance(X, Y, type="COSINE", name="cosine_distance")


cosine_similarity.__doc__ = inspect.getdoc(vendored_metrics_pairwise.cosine_similarity)
paired_cosine_distances.__doc__ = inspect.getdoc(
vendored_metrics_pairwise.paired_cosine_distances
)
6 changes: 3 additions & 3 deletions tests/system/small/ml/test_metrics_pairwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@
import bigframes.pandas as bpd


def test_cosine_similarity():
def test_paired_cosine_distances():
x_col = [np.array([4.1, 0.5, 1.0])]
y_col = [np.array([3.0, 0.0, 2.5])]
X = bpd.read_pandas(pd.DataFrame({"X": x_col}))
Y = bpd.read_pandas(pd.DataFrame({"Y": y_col}))

result = metrics.pairwise.cosine_similarity(X, Y)
result = metrics.pairwise.paired_cosine_distances(X, Y)
expected_pd_df = pd.DataFrame(
{"X": x_col, "Y": y_col, "cosine_similarity": [0.108199]}
{"X": x_col, "Y": y_col, "cosine_distance": [0.108199]}
)

pd.testing.assert_frame_equal(
Expand Down
11 changes: 3 additions & 8 deletions third_party/bigframes_vendored/sklearn/metrics/pairwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,8 @@
import bigframes.pandas as bpd


def cosine_similarity(X, Y) -> bpd.DataFrame:
"""Compute cosine similarity between samples in X and Y.
Cosine similarity, or the cosine kernel, computes similarity as the
normalized dot product of X and Y:
K(X, Y) = <X, Y> / (||X||*||Y||)
def paired_cosine_distances(X, Y) -> bpd.DataFrame:
"""Compute the paired cosine distances between X and Y.
Args:
X (Series or single column DataFrame of array of numeric type):
Expand All @@ -26,6 +21,6 @@ def cosine_similarity(X, Y) -> bpd.DataFrame:
Input data. X and Y are mapped by indexes, must have the same index.
Returns:
bigframes.dataframe.DataFrame: DataFrame with columns of X, Y and cosine_similarity
bigframes.dataframe.DataFrame: DataFrame with columns of X, Y and cosine_distance
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

0 comments on commit 81ece46

Please sign in to comment.