Skip to content

Commit

Permalink
chore: de-flake sem_cluster test by relaxing the passing requirement (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
sycai authored Dec 11, 2024
1 parent f200f68 commit 93ce2ac
Showing 1 changed file with 16 additions and 4 deletions.
20 changes: 16 additions & 4 deletions tests/system/large/operations/test_semantics.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,25 +165,37 @@ def test_agg_invalid_cluster_column_raise_error(gemini_flash_model, cluster_colu
[
pytest.param(1, id="one", marks=pytest.mark.xfail(raises=ValueError)),
pytest.param(2, id="two"),
pytest.param(4, id="four"),
],
)
def test_cluster_by(session, text_embedding_generator, n_clusters):
bigframes.options.experiments.semantic_operators = True
df = dataframe.DataFrame(
({"Product": ["Smartphone", "Laptop", "Coffee Maker", "T-shirt", "Jeans"]}),
(
{
"Item": [
"Orange",
"Cantaloupe",
"Watermelon",
"Chicken",
"Duck",
"Hen",
"Rooster",
]
}
),
session=session,
)
output_column = "cluster id"
result = df.semantics.cluster_by(
"Product",
"Item",
output_column,
text_embedding_generator,
n_clusters=n_clusters,
)

assert output_column in result
assert len(result[output_column].unique()) == n_clusters
# In rare cases, it's possible to have fewer than K clusters due to randomness.
assert len(result[output_column].unique()) <= n_clusters


def test_cluster_by_invalid_column(session, text_embedding_generator):
Expand Down

0 comments on commit 93ce2ac

Please sign in to comment.