Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FR-186 - Export results into multiple mode #209

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
231 changes: 214 additions & 17 deletions examples/vertex_ai_conversation/evaluation_tool__autoeval__colab.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,12 @@
"execution_count": null,
"metadata": {
"collapsed": true,
"id": "0U8xQwhKrOUq"
"id": "0U8xQwhKrOUq",
"cellView": "form"
},
"outputs": [],
"source": [
"# @markdown `install packages`\n",
"!pip install dfcx-scrapi --quiet\n",
"!pip install rouge-score --quiet\n",
"\n",
Expand Down Expand Up @@ -62,6 +64,7 @@
"import time\n",
"import threading\n",
"import re\n",
"import os\n",
"\n",
"from typing import Any, TypedDict\n",
"\n",
Expand All @@ -85,6 +88,9 @@
"from dfcx_scrapi.core import sessions\n",
"from dfcx_scrapi.core.sessions import Sessions\n",
"from dfcx_scrapi.tools import dataframe_functions\n",
"from google.cloud import bigquery\n",
"\n",
"from google.cloud.bigquery import SchemaField\n",
"\n",
"from googleapiclient.discovery import build\n",
"from googleapiclient.http import MediaInMemoryUpload, MediaIoBaseDownload\n",
Expand All @@ -96,6 +102,7 @@
"from google.cloud.dialogflowcx_v3beta1 import types\n",
"from google.colab import auth\n",
"from google.protobuf.json_format import MessageToDict\n",
"from google.colab import files\n",
"\n",
"from rouge_score import rouge_scorer\n",
"\n",
Expand Down Expand Up @@ -1937,6 +1944,91 @@
" )\n",
" return folder_url\n",
"\n",
" def export_to_csv(self, file_name: str):\n",
" queryset = self.scrape_outputs.drop(RESPONSE, axis=1)\n",
" responses = self.scrape_outputs[RESPONSE].apply(lambda x: x.to_row())\n",
" responses = pd.DataFrame(responses.to_list(), index=queryset.index)\n",
"\n",
" for column in [_ANSWER_GENERATOR_LLM_PROMPT, _SEARCH_RESULTS]:\n",
" truncate(responses, column)\n",
"\n",
" results = pd.concat([queryset, responses, self.metric_outputs], axis=1)\n",
" temp_dir = \"/tmp/evaluation_results\"\n",
" os.makedirs(temp_dir, exist_ok=True)\n",
" filepath = os.path.join(temp_dir, file_name)\n",
" results.to_csv(filepath, index=False)\n",
"\n",
" return filepath\n",
"\n",
" def display_on_screen(self):\n",
" queryset = self.scrape_outputs.drop(RESPONSE, axis=1)\n",
" responses = self.scrape_outputs[RESPONSE].apply(lambda x: x.to_row())\n",
" responses = pd.DataFrame(responses.to_list(), index=queryset.index)\n",
"\n",
" for column in [_ANSWER_GENERATOR_LLM_PROMPT, _SEARCH_RESULTS]:\n",
" truncate(responses, column)\n",
"\n",
" results = pd.concat([queryset, responses, self.metric_outputs], axis=1)\n",
"\n",
" return results\n",
"\n",
" def get_bigquery_types(df):\n",
" \"\"\"Maps DataFrame data types to BigQuery data types using a dictionary.\"\"\"\n",
" types = []\n",
" data_type_mapping = {\n",
" 'object': 'STRING',\n",
" 'int64': 'INTEGER',\n",
" 'float64': 'FLOAT',\n",
" 'bool': 'BOOLEAN',\n",
" 'datetime64[ns]': 'TIMESTAMP' # Assuming nanosecond timestamps\n",
" }\n",
" for dtype in df.dtypes:\n",
" if dtype in data_type_mapping:\n",
" types.append(data_type_mapping[dtype])\n",
" else:\n",
" # Handle other data types (error handling or placeholder)\n",
" types.append('STRING') # Placeholder, adjust as needed\n",
" print(f\"Warning: Unhandled data type: {dtype}\")\n",
" return types\n",
"\n",
"\n",
" def sanitize_column_names(df):\n",
" \"\"\"Sanitizes column names in a DataFrame by replacing special characters with underscores.\n",
" \"\"\"\n",
" sanitized_names = []\n",
" for col in df.columns:\n",
" # Replace special characters with underscores using a regular expression\n",
" sanitized_name = re.sub(r\"[^\\w\\s]\", \"_\", col)\n",
" sanitized_names.append(sanitized_name)\n",
" return df.rename(columns=dict(zip(df.columns, sanitized_names)))\n",
"\n",
" def export_to_bigquery(self,project_id,dataset_id,table_name:str, credentials):\n",
" data=evaluation_result.scrape_outputs[RESPONSE].apply(lambda x: x.to_row())\n",
" data = pd.DataFrame(data.to_list(),evaluation_result.scrape_outputs.index)\n",
" evaluation_result.scrape_outputs[RESPONSE] = None\n",
" df = pd.concat([data,evaluation_result.scrape_outputs, evaluation_result.metric_outputs], axis=1)\n",
" df=EvaluationResult.sanitize_column_names(df)\n",
" # Create a BigQuery client\n",
" client = bigquery.Client(project=project_id, credentials=credentials)\n",
"\n",
" try:\n",
" df['conversation_id'] = df['conversation_id'].astype(str)\n",
" df['latency'] = df['latency'].astype(str)\n",
" df['expected_uri'] = df['expected_uri'].astype(str)\n",
" df['answerable'] = df['answerable'].astype(str)\n",
" df['golden_snippet'] = df['golden_snippet'].astype(str)\n",
"\n",
" df = df.drop('query_result', axis=1)\n",
" df = df.drop('golden_snippet', axis=1)\n",
" df = df.drop('answerable', axis=1)\n",
"\n",
" load_job = client.load_table_from_dataframe(df, '.'.join([project_id, dataset_id, table_name]))\n",
"\n",
" return load_job.result()\n",
" except Exception as e:\n",
" print(f\"Error exporting data: {e}\")\n",
" return None # Indicate failure\n",
"\n",
" @property\n",
" def timestamp(self) -> str:\n",
" return self.metric_outputs[\"evaluation_timestamp\"].iloc[0]\n",
Expand Down Expand Up @@ -2210,10 +2302,13 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "qwhOvOSOmnJ4"
"id": "qwhOvOSOmnJ4",
"cellView": "form"
},
"outputs": [],
"source": [
"# @markdown `run this cell to load data manually`\n",
"\n",
"sample_df = pd.DataFrame(columns=INPUT_SCHEMA_REQUIRED_COLUMNS)\n",
"\n",
"sample_df.loc[0] = [\"0\", 1 ,\"Who are you?\", \"I am an assistant\", \"www.google.com\", None]\n",
Expand Down Expand Up @@ -2422,23 +2517,106 @@
"execution_count": null,
"metadata": {
"collapsed": true,
"id": "9NeMsvykHb0E"
"id": "9NeMsvykHb0E",
"cellView": "form"
},
"outputs": [],
"source": [
"# @markdown `evaluation results`\n",
"evaluation_result = evaluator.run(scrape_result)"
]
},
{
"cell_type": "markdown",
"source": [
"## Export results"
],
"metadata": {
"id": "HAZt4TG3Pnwe"
}
},
{
"cell_type": "markdown",
"source": [
"### Option 1. - Display"
],
"metadata": {
"id": "dnjWYe58P25A"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "mLNaJ8S-RC4O",
"cellView": "form"
},
"outputs": [],
"source": [
"# @markdown `run this cell to display evaluation results`\n",
"Number_of_rows = 3 # @param {type: \"integer\"}\n",
"\n",
"\n",
"results=evaluation_result.display_on_screen()\n",
"results.head(Number_of_rows)"
]
},
{
"cell_type": "markdown",
"source": [
"### Option 2. - To local.csv and download to your system"
],
"metadata": {
"id": "OfxfaXQdQF-p"
}
},
{
"cell_type": "code",
"source": [
"# @markdown `run this cell to export evaluation results into Google Sheets`\n",
"\n",
"FILE_NAME = \"evaluation_results.csv\" # @param {type: \"string\"}\n",
"\n",
"filepath = evaluation_result.export_to_csv(FILE_NAME)\n",
"\n",
"# Prompt user to download the file\n",
"print(f\"CSV file created at: {filepath}\")\n",
"print(\"Would you like to download the file? (y/n)\")\n",
"user_choice = input().lower()\n",
"\n",
"if user_choice == \"y\":\n",
" # Download the file using Colab's download feature\n",
" files.download(filepath)\n",
" print(\"File downloaded successfully!\")\n",
"else:\n",
" print(\"Download skipped.\")"
],
"metadata": {
"id": "Z4yFlm97rIRp",
"cellView": "form"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### Option 3. - To Google Sheets"
],
"metadata": {
"id": "NsZmpIBpQIu9"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cellView": "form",
"id": "F-aAhhD-qPJp"
"id": "YzjPmsPVUaJt"
},
"outputs": [],
"source": [
"# @markdown `export evaluation results`\n",
"# @markdown `run this cell to export evaluation results into Google Sheets`\n",
"\n",
"FOLDER_NAME = \"result\" # @param {type: \"string\"}\n",
"CHUNK_SIZE = 50 # @param {type: \"number\"}\n",
Expand All @@ -2454,6 +2632,33 @@
"print(f\"Exported results to folder: {folder_url}\")"
]
},
{
"cell_type": "markdown",
"source": [
"### Option 4. - To Bigquery\n",
"\n"
],
"metadata": {
"id": "z8WDndEnpj82"
}
},
{
"cell_type": "code",
"source": [
"BQ_PROJECT_ID=\"\" # @param {type: \"string\"}\n",
"BQ_DATASET_ID=\"\" # @param {type: \"string\"}\n",
"BQ_TABLE_NAME =\"\" # @param {type: \"string\"}\n",
"\n",
"\n",
"filepath = evaluation_result.export_to_bigquery(BQ_PROJECT_ID,BQ_DATASET_ID,BQ_TABLE_NAME,credentials)"
],
"metadata": {
"id": "CwUj_bosp3Mv",
"cellView": "form"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
Expand All @@ -2467,10 +2672,12 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "kARLoOJYBJ0e"
"id": "kARLoOJYBJ0e",
"cellView": "form"
},
"outputs": [],
"source": [
"# @markdown `Folder url`\n",
"FOLDER_URLS = [\n",
" folder_url, # latest evaluation\n",
" # add previous evaluations e.g: https://drive.google.com/drive/folders/<id>\n",
Expand Down Expand Up @@ -2541,17 +2748,7 @@
"metadata": {
"colab": {
"private_outputs": true,
"provenance": [
{
"file_id": "17WDmf3DsZGg1ZGwnr40sMXMQtyfxb4ms",
"timestamp": 1713942777165
},
{
"file_id": "1b769OFNM8gH56ZzvWUfw4MpGUc-pSF-g",
"timestamp": 1708939513950
}
],
"toc_visible": true
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
Expand Down
Loading