Skip to content

Commit

Permalink
docs: show best practice of closing the session to cleanup resources …
Browse files Browse the repository at this point in the history
…in sample notebooks (#1095)

BEGIN_COMMIT_OVERRIDE
docs: show best practice of closing the session to cleanup resources in sample notebooks (#1095)

test: include stale cloud functions cleanup in the default nox run (#1095)
END_COMMIT_OVERRIDE

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕
  • Loading branch information
shobsi authored Oct 18, 2024
1 parent 863d694 commit 62a88e8
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 23 deletions.
2 changes: 1 addition & 1 deletion bigframes/core/global_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
_global_session_state.thread_local_session = None


def _try_close_session(session):
def _try_close_session(session: bigframes.session.Session):
"""Try to close the session and warn if couldn't."""
try:
session.close()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1217,6 +1217,15 @@
"Otherwise, you can uncomment the remaining cells and run them to delete the individual resources you created in this tutorial:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"bf.close_session()"
]
},
{
"cell_type": "code",
"execution_count": 24,
Expand Down
10 changes: 10 additions & 0 deletions notebooks/getting_started/getting_started_bq_dataframes.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1704,6 +1704,16 @@
"Otherwise, you can uncomment the remaining cells and run them to delete the individual resources you created in this tutorial:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Delete the temporary cloud artifacts created during the bigframes session \n",
"bpd.close_session()"
]
},
{
"cell_type": "code",
"execution_count": 25,
Expand Down
27 changes: 22 additions & 5 deletions notebooks/location/regionalized.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
"source": [
"# README\n",
"\n",
"This Notebook runs differently depending on the following environent variable:\n",
"1. BIGQUERY_LOCATION - can take values as per https://cloud.google.com/bigquery/docs/locations, e.g. `us`, `asia-east1`"
"This Notebook runs requiring the following environent variable:\n",
"1. GOOGLE_CLOUD_PROJECT - The google cloud project id.\n",
"1. BIGQUERY_LOCATION - can take values as per https://cloud.google.com/bigquery/docs/locations, e.g. `us`, `asia-east1`."
]
},
{
Expand Down Expand Up @@ -1420,8 +1421,8 @@
}
],
"source": [
"import bigframes.pandas as pd\n",
"help(pd.remote_function)"
"import bigframes.pandas as bpd\n",
"help(bpd.remote_function)"
]
},
{
Expand Down Expand Up @@ -1460,7 +1461,7 @@
}
],
"source": [
"@pd.remote_function([float], str, bigquery_connection='bigframes-rf-conn')\n",
"@bpd.remote_function([float], str, bigquery_connection='bigframes-rf-conn')\n",
"def get_bucket(num):\n",
" if not num: return \"NA\"\n",
" boundary = 4000\n",
Expand Down Expand Up @@ -2784,6 +2785,22 @@
"source": [
"model.to_gbq(f\"{DATASET}.penguins_model\", replace=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Clean Up"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"bpd.close_session()"
]
}
],
"metadata": {
Expand Down
18 changes: 17 additions & 1 deletion notebooks/remote_functions/remote_function_usecases.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Setup"
"# Set Up"
]
},
{
Expand Down Expand Up @@ -1379,6 +1379,22 @@
"df1 = df.assign(duration_cat=df[\"duration_minutes\"].apply(duration_category))\n",
"df1.peek()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Clean Up"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"bpd.close_session()"
]
}
],
"metadata": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -452,12 +452,21 @@
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Clean Up"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"source": [
"bpd.close_session()"
]
}
],
"metadata": {
Expand Down
26 changes: 13 additions & 13 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@
"system-3.9",
"system-3.12",
"cover",
"cleanup",
]

# Error if a python version is missing
Expand Down Expand Up @@ -952,25 +953,24 @@ def release_dry_run(session):
def cleanup(session):
"""Clean up stale and/or temporary resources in the test project."""
google_cloud_project = os.getenv("GOOGLE_CLOUD_PROJECT")
if not google_cloud_project:
session.error(
"Set GOOGLE_CLOUD_PROJECT environment variable to run notebook session."
)
cleanup_options = []
if google_cloud_project:
cleanup_options.append(f"--project-id={google_cloud_project}")

# Cleanup a few stale (more than 12 hours old) temporary cloud run
# functions created by bigframems. This will help keeping the test GCP
# project within the "Number of functions" quota
# https://cloud.google.com/functions/quotas#resource_limits
recency_cutoff_hours = 12
cleanup_count_per_location = 10
cleanup_count_per_location = 20
cleanup_options.extend(
[
f"--recency-cutoff={recency_cutoff_hours}",
"cleanup",
f"--number={cleanup_count_per_location}",
]
)

session.install("-e", ".")

session.run(
"python",
"scripts/manage_cloud_functions.py",
f"--project-id={google_cloud_project}",
f"--recency-cutoff={recency_cutoff_hours}",
"cleanup",
f"--number={cleanup_count_per_location}",
)
session.run("python", "scripts/manage_cloud_functions.py", *cleanup_options)
17 changes: 15 additions & 2 deletions scripts/manage_cloud_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,12 @@ def list_str(values):
return [val for val in values.split(",") if val]


def get_project_from_environment():
from google.cloud import bigquery

return bigquery.Client().project


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Manage cloud functions created to serve bigframes remote functions."
Expand All @@ -161,9 +167,10 @@ def list_str(values):
"-p",
"--project-id",
type=str,
required=True,
required=False,
action="store",
help="GCP project-id.",
help="GCP project-id. If not provided, the project-id resolved by the"
" BigQuery client from the user environment would be used.",
)
parser.add_argument(
"-r",
Expand Down Expand Up @@ -212,4 +219,10 @@ def hours_to_timedelta(hrs):
parser_cleanup.set_defaults(func=cleanup_gcfs)

args = parser.parse_args(sys.argv[1:])
if args.project_id is None:
args.project_id = get_project_from_environment()
if args.project_id is None:
raise ValueError(
"Could not resolve a project. Plese set it via --project-id option."
)
args.func(args)

0 comments on commit 62a88e8

Please sign in to comment.