From e9c59cefbc162b7bec10bfc2c5e1aba6f38bd33c Mon Sep 17 00:00:00 2001 From: Naveen Kumar M Date: Wed, 17 Jul 2024 12:47:07 +0530 Subject: [PATCH] export results --- .../evaluation_tool__autoeval__colab.ipynb | 231 ++++++++++++++++-- 1 file changed, 214 insertions(+), 17 deletions(-) diff --git a/examples/vertex_ai_conversation/evaluation_tool__autoeval__colab.ipynb b/examples/vertex_ai_conversation/evaluation_tool__autoeval__colab.ipynb index 97a8f30b..e0e1abb0 100644 --- a/examples/vertex_ai_conversation/evaluation_tool__autoeval__colab.ipynb +++ b/examples/vertex_ai_conversation/evaluation_tool__autoeval__colab.ipynb @@ -25,10 +25,12 @@ "execution_count": null, "metadata": { "collapsed": true, - "id": "0U8xQwhKrOUq" + "id": "0U8xQwhKrOUq", + "cellView": "form" }, "outputs": [], "source": [ + "# @markdown `install packages`\n", "!pip install dfcx-scrapi --quiet\n", "!pip install rouge-score --quiet\n", "\n", @@ -62,6 +64,7 @@ "import time\n", "import threading\n", "import re\n", + "import os\n", "\n", "from typing import Any, TypedDict\n", "\n", @@ -85,6 +88,9 @@ "from dfcx_scrapi.core import sessions\n", "from dfcx_scrapi.core.sessions import Sessions\n", "from dfcx_scrapi.tools import dataframe_functions\n", + "from google.cloud import bigquery\n", + "\n", + "from google.cloud.bigquery import SchemaField\n", "\n", "from googleapiclient.discovery import build\n", "from googleapiclient.http import MediaInMemoryUpload, MediaIoBaseDownload\n", @@ -96,6 +102,7 @@ "from google.cloud.dialogflowcx_v3beta1 import types\n", "from google.colab import auth\n", "from google.protobuf.json_format import MessageToDict\n", + "from google.colab import files\n", "\n", "from rouge_score import rouge_scorer\n", "\n", @@ -1919,6 +1926,91 @@ " )\n", " return folder_url\n", "\n", + " def export_to_csv(self, file_name: str):\n", + " queryset = self.scrape_outputs.drop(RESPONSE, axis=1)\n", + " responses = self.scrape_outputs[RESPONSE].apply(lambda x: x.to_row())\n", + " responses = pd.DataFrame(responses.to_list(), index=queryset.index)\n", + "\n", + " for column in [_ANSWER_GENERATOR_LLM_PROMPT, _SEARCH_RESULTS]:\n", + " truncate(responses, column)\n", + "\n", + " results = pd.concat([queryset, responses, self.metric_outputs], axis=1)\n", + " temp_dir = \"/tmp/evaluation_results\"\n", + " os.makedirs(temp_dir, exist_ok=True)\n", + " filepath = os.path.join(temp_dir, file_name)\n", + " results.to_csv(filepath, index=False)\n", + "\n", + " return filepath\n", + "\n", + " def display_on_screen(self):\n", + " queryset = self.scrape_outputs.drop(RESPONSE, axis=1)\n", + " responses = self.scrape_outputs[RESPONSE].apply(lambda x: x.to_row())\n", + " responses = pd.DataFrame(responses.to_list(), index=queryset.index)\n", + "\n", + " for column in [_ANSWER_GENERATOR_LLM_PROMPT, _SEARCH_RESULTS]:\n", + " truncate(responses, column)\n", + "\n", + " results = pd.concat([queryset, responses, self.metric_outputs], axis=1)\n", + "\n", + " return results\n", + "\n", + " def get_bigquery_types(df):\n", + " \"\"\"Maps DataFrame data types to BigQuery data types using a dictionary.\"\"\"\n", + " types = []\n", + " data_type_mapping = {\n", + " 'object': 'STRING',\n", + " 'int64': 'INTEGER',\n", + " 'float64': 'FLOAT',\n", + " 'bool': 'BOOLEAN',\n", + " 'datetime64[ns]': 'TIMESTAMP' # Assuming nanosecond timestamps\n", + " }\n", + " for dtype in df.dtypes:\n", + " if dtype in data_type_mapping:\n", + " types.append(data_type_mapping[dtype])\n", + " else:\n", + " # Handle other data types (error handling or placeholder)\n", + " types.append('STRING') # Placeholder, adjust as needed\n", + " print(f\"Warning: Unhandled data type: {dtype}\")\n", + " return types\n", + "\n", + "\n", + " def sanitize_column_names(df):\n", + " \"\"\"Sanitizes column names in a DataFrame by replacing special characters with underscores.\n", + " \"\"\"\n", + " sanitized_names = []\n", + " for col in df.columns:\n", + " # Replace special characters with underscores using a regular expression\n", + " sanitized_name = re.sub(r\"[^\\w\\s]\", \"_\", col)\n", + " sanitized_names.append(sanitized_name)\n", + " return df.rename(columns=dict(zip(df.columns, sanitized_names)))\n", + "\n", + " def export_to_bigquery(self,project_id,dataset_id,table_name:str, credentials):\n", + " data=evaluation_result.scrape_outputs[RESPONSE].apply(lambda x: x.to_row())\n", + " data = pd.DataFrame(data.to_list(),evaluation_result.scrape_outputs.index)\n", + " evaluation_result.scrape_outputs[RESPONSE] = None\n", + " df = pd.concat([data,evaluation_result.scrape_outputs, evaluation_result.metric_outputs], axis=1)\n", + " df=EvaluationResult.sanitize_column_names(df)\n", + " # Create a BigQuery client\n", + " client = bigquery.Client(project=project_id, credentials=credentials)\n", + "\n", + " try:\n", + " df['conversation_id'] = df['conversation_id'].astype(str)\n", + " df['latency'] = df['latency'].astype(str)\n", + " df['expected_uri'] = df['expected_uri'].astype(str)\n", + " df['answerable'] = df['answerable'].astype(str)\n", + " df['golden_snippet'] = df['golden_snippet'].astype(str)\n", + "\n", + " df = df.drop('query_result', axis=1)\n", + " df = df.drop('golden_snippet', axis=1)\n", + " df = df.drop('answerable', axis=1)\n", + "\n", + " load_job = client.load_table_from_dataframe(df, '.'.join([project_id, dataset_id, table_name]))\n", + "\n", + " return load_job.result()\n", + " except Exception as e:\n", + " print(f\"Error exporting data: {e}\")\n", + " return None # Indicate failure\n", + "\n", " @property\n", " def timestamp(self) -> str:\n", " return self.metric_outputs[\"evaluation_timestamp\"].iloc[0]\n", @@ -2192,10 +2284,13 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "qwhOvOSOmnJ4" + "id": "qwhOvOSOmnJ4", + "cellView": "form" }, "outputs": [], "source": [ + "# @markdown `run this cell to load data manually`\n", + "\n", "sample_df = pd.DataFrame(columns=INPUT_SCHEMA_REQUIRED_COLUMNS)\n", "\n", "sample_df.loc[0] = [\"0\", 1 ,\"Who are you?\", \"I am an assistant\", \"www.google.com\"]\n", @@ -2389,23 +2484,106 @@ "execution_count": null, "metadata": { "collapsed": true, - "id": "9NeMsvykHb0E" + "id": "9NeMsvykHb0E", + "cellView": "form" }, "outputs": [], "source": [ + "# @markdown `evaluation results`\n", "evaluation_result = evaluator.run(scrape_result)" ] }, + { + "cell_type": "markdown", + "source": [ + "## Export results" + ], + "metadata": { + "id": "HAZt4TG3Pnwe" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Option 1. - Display" + ], + "metadata": { + "id": "dnjWYe58P25A" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mLNaJ8S-RC4O", + "cellView": "form" + }, + "outputs": [], + "source": [ + "# @markdown `run this cell to display evaluation results`\n", + "Number_of_rows = 3 # @param {type: \"integer\"}\n", + "\n", + "\n", + "results=evaluation_result.display_on_screen()\n", + "results.head(Number_of_rows)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Option 2. - To local.csv and download to your system" + ], + "metadata": { + "id": "OfxfaXQdQF-p" + } + }, + { + "cell_type": "code", + "source": [ + "# @markdown `run this cell to export evaluation results into Google Sheets`\n", + "\n", + "FILE_NAME = \"evaluation_results.csv\" # @param {type: \"string\"}\n", + "\n", + "filepath = evaluation_result.export_to_csv(FILE_NAME)\n", + "\n", + "# Prompt user to download the file\n", + "print(f\"CSV file created at: {filepath}\")\n", + "print(\"Would you like to download the file? (y/n)\")\n", + "user_choice = input().lower()\n", + "\n", + "if user_choice == \"y\":\n", + " # Download the file using Colab's download feature\n", + " files.download(filepath)\n", + " print(\"File downloaded successfully!\")\n", + "else:\n", + " print(\"Download skipped.\")" + ], + "metadata": { + "id": "Z4yFlm97rIRp", + "cellView": "form" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Option 3. - To Google Sheets" + ], + "metadata": { + "id": "NsZmpIBpQIu9" + } + }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", - "id": "F-aAhhD-qPJp" + "id": "YzjPmsPVUaJt" }, "outputs": [], "source": [ - "# @markdown `export evaluation results`\n", + "# @markdown `run this cell to export evaluation results into Google Sheets`\n", "\n", "FOLDER_NAME = \"result\" # @param {type: \"string\"}\n", "CHUNK_SIZE = 50 # @param {type: \"number\"}\n", @@ -2421,6 +2599,33 @@ "print(f\"Exported results to folder: {folder_url}\")" ] }, + { + "cell_type": "markdown", + "source": [ + "### Option 4. - To Bigquery\n", + "\n" + ], + "metadata": { + "id": "z8WDndEnpj82" + } + }, + { + "cell_type": "code", + "source": [ + "BQ_PROJECT_ID=\"\" # @param {type: \"string\"}\n", + "BQ_DATASET_ID=\"\" # @param {type: \"string\"}\n", + "BQ_TABLE_NAME =\"\" # @param {type: \"string\"}\n", + "\n", + "\n", + "filepath = evaluation_result.export_to_bigquery(BQ_PROJECT_ID,BQ_DATASET_ID,BQ_TABLE_NAME,credentials)" + ], + "metadata": { + "id": "CwUj_bosp3Mv", + "cellView": "form" + }, + "execution_count": null, + "outputs": [] + }, { "cell_type": "markdown", "metadata": { @@ -2434,10 +2639,12 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "kARLoOJYBJ0e" + "id": "kARLoOJYBJ0e", + "cellView": "form" }, "outputs": [], "source": [ + "# @markdown `Folder url`\n", "FOLDER_URLS = [\n", " folder_url, # latest evaluation\n", " # add previous evaluations e.g: https://drive.google.com/drive/folders/\n", @@ -2508,17 +2715,7 @@ "metadata": { "colab": { "private_outputs": true, - "provenance": [ - { - "file_id": "17WDmf3DsZGg1ZGwnr40sMXMQtyfxb4ms", - "timestamp": 1713942777165 - }, - { - "file_id": "1b769OFNM8gH56ZzvWUfw4MpGUc-pSF-g", - "timestamp": 1708939513950 - } - ], - "toc_visible": true + "provenance": [] }, "kernelspec": { "display_name": "Python 3",