Skip to content

Commit

Permalink
Merge branch 'main' into index_align
Browse files Browse the repository at this point in the history
  • Loading branch information
TrevorBergeron authored Apr 3, 2024
2 parents ed4eef0 + 1caac27 commit bf5f61a
Show file tree
Hide file tree
Showing 7 changed files with 554 additions and 563 deletions.
9 changes: 8 additions & 1 deletion bigframes/ml/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
_GEMINI_PRO_ENDPOINT = "gemini-pro"

_ML_GENERATE_TEXT_STATUS = "ml_generate_text_status"
_ML_EMBED_TEXT_STATUS = "ml_generate_embedding_status"
_ML_EMBED_TEXT_STATUS = "ml_embed_text_status"


@log_adapter.class_logger
Expand Down Expand Up @@ -390,6 +390,13 @@ def predict(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame:
}

df = self._bqml_model.generate_embedding(X, options)
df = df.rename(
columns={
"ml_generate_embedding_result": "text_embedding",
"ml_generate_embedding_statistics": "statistics",
"ml_generate_embedding_status": _ML_EMBED_TEXT_STATUS,
}
)

if (df[_ML_EMBED_TEXT_STATUS] != "").any():
warnings.warn(
Expand Down
6 changes: 6 additions & 0 deletions bigframes/operations/_matplotlib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,12 @@ def __init__(self, data, **kwargs) -> None:
f"Only support a single color string or a column name/posision. {constants.FEEDBACK_LINK}"
)

s = self.kwargs.get("s", None)
if self._is_sequence_arg(s):
raise NotImplementedError(
f"Only support a single color string or a column name/posision. {constants.FEEDBACK_LINK}"
)

def _compute_plot_data(self):
sample = self._compute_sample_data(self.data)

Expand Down
5 changes: 5 additions & 0 deletions docs/templates/toc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@
name: Indexers
- name: pandas
uid: bigframes.pandas
- items:
- name: Plotting
uid: bigframes.operations.plotting
- name: PlotAccessor
uid: bigframes.operations.plotting.PlotAccessor
- items:
- name: Series
uid: bigframes.series.Series
Expand Down
1,066 changes: 513 additions & 553 deletions notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions tests/system/small/ml/test_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,8 +261,8 @@ def test_embedding_generator_predict_success(
):
df = palm2_embedding_generator_model.predict(llm_text_df).to_pandas()
assert df.shape == (3, 4)
assert "ml_generate_embedding_result" in df.columns
series = df["ml_generate_embedding_result"]
assert "text_embedding" in df.columns
series = df["text_embedding"]
value = series[0]
assert len(value) == 768

Expand All @@ -273,8 +273,8 @@ def test_embedding_generator_multilingual_predict_success(
):
df = palm2_embedding_generator_multilingual_model.predict(llm_text_df).to_pandas()
assert df.shape == (3, 4)
assert "ml_generate_embedding_result" in df.columns
series = df["ml_generate_embedding_result"]
assert "text_embedding" in df.columns
series = df["text_embedding"]
value = series[0]
assert len(value) == 768

Expand All @@ -285,8 +285,8 @@ def test_embedding_generator_predict_series_success(
):
df = palm2_embedding_generator_model.predict(llm_text_df["prompt"]).to_pandas()
assert df.shape == (3, 4)
assert "ml_generate_embedding_result" in df.columns
series = df["ml_generate_embedding_result"]
assert "text_embedding" in df.columns
series = df["text_embedding"]
value = series[0]
assert len(value) == 768

Expand Down
16 changes: 16 additions & 0 deletions tests/system/small/operations/test_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,22 @@ def test_scatter_args_c(c):
)


@pytest.mark.parametrize(
("arg_name"),
[
pytest.param("c", marks=pytest.mark.xfail(raises=NotImplementedError)),
pytest.param("s", marks=pytest.mark.xfail(raises=NotImplementedError)),
],
)
def test_scatter_sequence_arg(arg_name):
data = {
"a": [1, 2, 3],
"b": [1, 2, 3],
}
arg_value = [3, 3, 1]
bpd.DataFrame(data).plot.scatter(x="a", y="b", **{arg_name: arg_value})


def test_sampling_plot_args_n():
df = bpd.DataFrame(np.arange(bf_mpl.DEFAULT_SAMPLING_N * 10), columns=["one"])
ax = df.plot.line()
Expand Down
3 changes: 0 additions & 3 deletions third_party/bigframes_vendored/pandas/plotting/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,6 @@ def scatter(
- A string with the name of the column to be used for marker's size.
- A single scalar so all points have the same size.
- A sequence of scalars, which will be used for each point's size
recursively. For instance, when passing [2,14] all points size
will be either 2 or 14, alternatively.
c (str, int or array-like, optional):
The color of each point. Possible values are:
Expand Down

0 comments on commit bf5f61a

Please sign in to comment.