Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add Gemini-pro-1.5 to GeminiTextGenerator Tuning and Support score() method in Gemini-pro-1.5 #1208

Merged
merged 23 commits into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
97d9259
docs(bigquery): update minor parts in base.py
Dec 3, 2024
c9318d0
docs(bigquery): update minor changes for bigframes/ml/base.py
Dec 3, 2024
4f9370c
Merge branch 'shuowei-doc-update'
Dec 3, 2024
8d5e0ed
Merge branch 'shuowei-doc-update'
Dec 3, 2024
e5413a1
Merge branch 'shuowei-update-textembedding005'
Dec 3, 2024
7b65227
Merge branch 'shuowei-doc-update'
Dec 3, 2024
9d6376b
Merge branch 'shuowei-update-textembedding005'
Dec 3, 2024
f315f54
Merge branch 'shuowei-text-generator-tuning'
Dec 10, 2024
e9f28f4
feat: Update lUpdate GeminiTextGenerator Tuning and Support score() m…
Dec 11, 2024
5d2a807
feat: Update lUpdate GeminiTextGenerator Tuning and Support score() m…
Dec 11, 2024
dc765ec
Merge branch 'main' into shuowei-text-generator-tuning
shuoweil Dec 11, 2024
7a40315
Merge branch 'main' into shuowei-text-generator-tuning
shuoweil Dec 11, 2024
8be16d3
update testcase and docs for better clarification
Dec 11, 2024
8a39a02
update endpoint to corresponding endpoint for fine tuning.
Dec 12, 2024
361a734
Merge branch 'main' into shuowei-text-generator-tuning
shuoweil Dec 12, 2024
b213753
Merge branch 'main' into shuowei-text-generator-tuning
shuoweil Dec 12, 2024
9de2c0e
docs(bigquery): update minor parts in base.py
Dec 3, 2024
ed001b8
fix syntax issue
Dec 12, 2024
9928f10
Revert "docs(bigquery): update minor parts in base.py"
Dec 12, 2024
ba80d10
Merge branch 'main' into shuowei-text-generator-tuning
shuoweil Dec 12, 2024
241ae73
merge gemini_fine_tune_endpoints and gemini_score_endpoints together,…
Dec 13, 2024
205e173
merge genimi_fine_tune_endpoints and genimi_score_endpoints, since th…
Dec 13, 2024
6a44e7b
Revert "merge genimi_fine_tune_endpoints and genimi_score_endpoints, …
Dec 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 21 additions & 8 deletions bigframes/ml/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@
_GEMINI_1P5_PRO_FLASH_PREVIEW_ENDPOINT,
_GEMINI_2_FLASH_EXP_ENDPOINT,
)
_GEMINI_FINE_TUNE_SCORE_ENDPOINTS = (
_GEMINI_PRO_ENDPOINT,
_GEMINI_1P5_PRO_002_ENDPOINT,
_GEMINI_1P5_FLASH_002_ENDPOINT,
)

_CLAUDE_3_SONNET_ENDPOINT = "claude-3-sonnet"
_CLAUDE_3_HAIKU_ENDPOINT = "claude-3-haiku"
Expand Down Expand Up @@ -890,7 +895,8 @@ def fit(
X: utils.ArrayType,
y: utils.ArrayType,
) -> GeminiTextGenerator:
"""Fine tune GeminiTextGenerator model. Only support "gemini-pro" model for now.
"""Fine tune GeminiTextGenerator model. Only support "gemini-pro", "gemini-1.5-pro-002",
"gemini-1.5-flash-002" models for now.

.. note::

Expand All @@ -908,13 +914,18 @@ def fit(
Returns:
GeminiTextGenerator: Fitted estimator.
"""
if self._bqml_model.model_name.startswith("gemini-1.5"):
raise NotImplementedError("Fit is not supported for gemini-1.5 model.")
if self.model_name not in _GEMINI_FINE_TUNE_SCORE_ENDPOINTS:
raise NotImplementedError(
"fit() only supports gemini-pro, \
gemini-1.5-pro-002, or gemini-1.5-flash-002 model."
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I couldn't leave a comment at unchanged lines. Need to update line 905 to each endpoint respectively. (still use gemini-1.0-pro-002 for gemini-pro, but issuing a warning for that case maybe more appropriate)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


X, y = utils.batch_convert_to_dataframe(X, y)

options = self._bqml_options
options["endpoint"] = "gemini-1.0-pro-002"
options["endpoint"] = (
"gemini-1.0-pro-002" if self.model_name == "gemini-pro" else self.model_name
)
options["prompt_col"] = X.columns.tolist()[0]

self._bqml_model = self._bqml_model_factory.create_llm_remote_model(
Expand Down Expand Up @@ -1025,7 +1036,7 @@ def score(
"text_generation", "classification", "summarization", "question_answering"
] = "text_generation",
) -> bpd.DataFrame:
"""Calculate evaluation metrics of the model. Only "gemini-pro" model is supported for now.
"""Calculate evaluation metrics of the model. Only support "gemini-pro" and "gemini-1.5-pro-002", and "gemini-1.5-flash-002".

.. note::

Expand Down Expand Up @@ -1057,9 +1068,11 @@ def score(
if not self._bqml_model:
raise RuntimeError("A model must be fitted before score")

# TODO(ashleyxu): Support gemini-1.5 when the rollout is ready. b/344891364.
if self._bqml_model.model_name.startswith("gemini-1.5"):
raise NotImplementedError("Score is not supported for gemini-1.5 model.")
if self.model_name not in _GEMINI_FINE_TUNE_SCORE_ENDPOINTS:
raise NotImplementedError(
"score() only supports gemini-pro \
, gemini-1.5-pro-002, and gemini-1.5-flash-2 model."
)

X, y = utils.batch_convert_to_dataframe(X, y, session=self._bqml_model.session)

Expand Down
14 changes: 10 additions & 4 deletions tests/system/load/test_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,19 @@ def llm_remote_text_df(session, llm_remote_text_pandas_df):
return session.read_pandas(llm_remote_text_pandas_df)


@pytest.mark.flaky(retries=2)
@pytest.mark.parametrize(
"model_name",
(
"gemini-pro",
"gemini-1.5-pro-002",
"gemini-1.5-flash-002",
),
)
def test_llm_gemini_configure_fit(
session, llm_fine_tune_df_default_index, llm_remote_text_df
session, model_name, llm_fine_tune_df_default_index, llm_remote_text_df
):
model = llm.GeminiTextGenerator(
session=session, model_name="gemini-pro", max_iterations=1
session=session, model_name=model_name, max_iterations=1
)

X_train = llm_fine_tune_df_default_index[["prompt"]]
Expand All @@ -69,7 +76,6 @@ def test_llm_gemini_configure_fit(
],
index=3,
)
# TODO(ashleyxu b/335492787): After bqml rolled out version control: save, load, check parameters to ensure configuration was kept


@pytest.mark.flaky(retries=2)
Expand Down
26 changes: 20 additions & 6 deletions tests/system/small/ml/test_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,9 +417,16 @@ def test_llm_palm_score_params(llm_fine_tune_df_default_index):
)


@pytest.mark.flaky(retries=2)
def test_llm_gemini_pro_score(llm_fine_tune_df_default_index):
model = llm.GeminiTextGenerator(model_name="gemini-pro")
@pytest.mark.parametrize(
"model_name",
(
"gemini-pro",
"gemini-1.5-pro-002",
"gemini-1.5-flash-002",
),
)
def test_llm_gemini_score(llm_fine_tune_df_default_index, model_name):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add for next test "test_llm_gemini_pro_score_params" as well

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

model = llm.GeminiTextGenerator(model_name=model_name)

# Check score to ensure the model was fitted
score_result = model.score(
Expand All @@ -439,9 +446,16 @@ def test_llm_gemini_pro_score(llm_fine_tune_df_default_index):
)


@pytest.mark.flaky(retries=2)
def test_llm_gemini_pro_score_params(llm_fine_tune_df_default_index):
model = llm.GeminiTextGenerator(model_name="gemini-pro")
@pytest.mark.parametrize(
"model_name",
(
"gemini-pro",
"gemini-1.5-pro-002",
"gemini-1.5-flash-002",
),
)
def test_llm_gemini_pro_score_params(llm_fine_tune_df_default_index, model_name):
model = llm.GeminiTextGenerator(model_name=model_name)

# Check score to ensure the model was fitted
score_result = model.score(
Expand Down
Loading