diff --git a/notebooks/generative_ai/bq_dataframes_llm_gemini_2.ipynb b/notebooks/generative_ai/bq_dataframes_llm_gemini_2.ipynb new file mode 100644 index 0000000000..d458a0f53b --- /dev/null +++ b/notebooks/generative_ai/bq_dataframes_llm_gemini_2.ipynb @@ -0,0 +1,377 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Copyright 2024 Google LLC\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# BigFrames Gemini 2.0 Text Generation Simple Example" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note: This feature is only available in bigframes >= 1.29.0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import bigframes.pandas as bpd\n", + "from bigframes.ml import llm" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create Gemini 2.0 experimental Model with model_name as \"gemini-2.0-flash-exp\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/garrettwu/src/bigframes/bigframes/ml/llm.py:803: PreviewWarning: Model gemini-2.0-flash-exp is subject to the \"Pre-GA Offerings Terms\" in the General Service Terms section of the\n", + " Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available \"as is\"\n", + " and might have limited support. For more information, see the launch stage descriptions\n", + " (https://cloud.google.com/products#product-launch-stages).\n", + " warnings.warn(\n", + "/usr/local/google/home/garrettwu/src/bigframes/bigframes/pandas/__init__.py:435: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n", + " return global_session.get_global_session()\n" + ] + }, + { + "data": { + "text/html": [ + "Query job f673a2ea-023e-4771-84a2-fb81f808fa1b is DONE. 0 Bytes processed. Open Job" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "model = llm.GeminiTextGenerator(model_name=\"gemini-2.0-flash-exp\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a simple DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Query job 2276ea5b-2e08-4ed6-af34-49a7d165d145 is DONE. 0 Bytes processed. Open Job" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
prompt
0Tell me something about Gemini 2.0.
\n", + "

1 rows × 1 columns

\n", + "
[1 rows x 1 columns in total]" + ], + "text/plain": [ + " prompt\n", + "0 Tell me something about Gemini 2.0.\n", + "\n", + "[1 rows x 1 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = bpd.DataFrame({\"prompt\": [\"Tell me something about Gemini 2.0.\"]})\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Make predictions" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Query job 9ba21e96-6023-491e-8e83-f2e6fa7df0e7 is DONE. 0 Bytes processed. Open Job" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/garrettwu/src/bigframes/bigframes/core/__init__.py:109: PreviewWarning: Interpreting JSON column(s) as StringDtype. This behavior may change in future versions.\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/html": [ + "Query job 933d45cc-4bc0-4bdf-b4b8-573da2d58be3 is DONE. 2 Bytes processed. Open Job" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Query job 3dda9bc6-84b1-4f4a-8891-85d25d8848ce is DONE. 4.3 kB processed. Open Job" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_text_llm_resultml_generate_text_rai_resultml_generate_text_statusprompt
0Alright, let's talk about Gemini 2.0! It's a b...<NA>Tell me something about Gemini 2.0.
\n", + "

1 rows × 4 columns

\n", + "
[1 rows x 4 columns in total]" + ], + "text/plain": [ + " ml_generate_text_llm_result \\\n", + "0 Alright, let's talk about Gemini 2.0! It's a b... \n", + "\n", + " ml_generate_text_rai_result ml_generate_text_status \\\n", + "0 \n", + "\n", + " prompt \n", + "0 Tell me something about Gemini 2.0. \n", + "\n", + "[1 rows x 4 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result = model.predict(df)\n", + "result" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save the model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Copy job 8e68af62-e7ab-475b-99c9-b79e8ba3c40b is DONE. Open Job" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/garrettwu/src/bigframes/bigframes/ml/llm.py:803: PreviewWarning: Model gemini-2.0-flash-exp is subject to the \"Pre-GA Offerings Terms\" in the General Service Terms section of the\n", + " Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available \"as is\"\n", + " and might have limited support. For more information, see the launch stage descriptions\n", + " (https://cloud.google.com/products#product-launch-stages).\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/html": [ + "Query job cae7f929-d8cb-4819-a644-ac832cdc0912 is DONE. 0 Bytes processed. Open Job" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "GeminiTextGenerator(connection_name='bigframes-dev.us.bigframes-rf-connection',\n", + " model_name='gemini-2.0-flash-exp',\n", + " session=)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.to_gbq(\"bigframes-dev.garrettwu.gemini_2_flash\", replace=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/noxfile.py b/noxfile.py index 341de704e5..cbc9d77558 100644 --- a/noxfile.py +++ b/noxfile.py @@ -749,6 +749,7 @@ def notebook(session: nox.Session): # bq_dataframes_llm_code_generation creates a bucket in the sample. "notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb", # Needs BUCKET_URI. "notebooks/generative_ai/sentiment_analysis.ipynb", # Too slow + "notebooks/generative_ai/bq_dataframes_llm_gemini_2.ipynb", # Gemini 2.0 backend hasn't ready in prod. # TODO(b/366290533): to protect BQML quota "notebooks/generative_ai/bq_dataframes_llm_claude3_museum_art.ipynb", "notebooks/vertex_sdk/sdk2_bigframes_pytorch.ipynb", # Needs BUCKET_URI.