Skip to content

Commit

Permalink
docs: add sample to run DML query (#591)
Browse files Browse the repository at this point in the history
* docs: add sample to run DML query

* cleanup leftover datasets before test run

* fix import order
  • Loading branch information
tswast authored Apr 13, 2021
1 parent 8089bdb commit ff2ec3a
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 0 deletions.
40 changes: 40 additions & 0 deletions samples/snippets/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,35 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import datetime
import random

from google.cloud import bigquery
import pytest


RESOURCE_PREFIX = "python_bigquery_samples_snippets"


def resource_prefix() -> str:
timestamp = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S")
random_string = hex(random.randrange(1000000))[2:]
return f"{RESOURCE_PREFIX}_{timestamp}_{random_string}"


@pytest.fixture(scope="session", autouse=True)
def cleanup_datasets(bigquery_client: bigquery.Client):
yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1)
for dataset in bigquery_client.list_datasets():
if (
dataset.dataset_id.startswith(RESOURCE_PREFIX)
and dataset.created < yesterday
):
bigquery_client.delete_dataset(
dataset, delete_contents=True, not_found_ok=True
)


@pytest.fixture(scope="session")
def bigquery_client():
bigquery_client = bigquery.Client()
Expand All @@ -25,3 +50,18 @@ def bigquery_client():
@pytest.fixture(scope="session")
def project_id(bigquery_client):
return bigquery_client.project


@pytest.fixture(scope="session")
def dataset_id(bigquery_client: bigquery.Client, project_id: str):
dataset_id = resource_prefix()
full_dataset_id = f"{project_id}.{dataset_id}"
dataset = bigquery.Dataset(full_dataset_id)
bigquery_client.create_dataset(dataset)
yield dataset_id
bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True)


@pytest.fixture
def bigquery_client_patch(monkeypatch, bigquery_client):
monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client)
36 changes: 36 additions & 0 deletions samples/snippets/test_update_with_dml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from google.cloud import bigquery
import pytest

from conftest import resource_prefix
import update_with_dml


@pytest.fixture
def table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str):
table_id = f"{resource_prefix()}_update_with_dml"
yield table_id
full_table_id = f"{project_id}.{dataset_id}.{table_id}"
bigquery_client.delete_table(full_table_id, not_found_ok=True)


def test_update_with_dml(bigquery_client_patch, dataset_id, table_id):
override_values = {
"dataset_id": dataset_id,
"table_id": table_id,
}
num_rows = update_with_dml.run_sample(override_values=override_values)
assert num_rows > 0
82 changes: 82 additions & 0 deletions samples/snippets/update_with_dml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# [START bigquery_update_with_dml]
import pathlib

from google.cloud import bigquery
from google.cloud.bigquery import enums


def load_from_newline_delimited_json(
client: bigquery.Client,
filepath: pathlib.Path,
project_id: str,
dataset_id: str,
table_id: str,
):
full_table_id = f"{project_id}.{dataset_id}.{table_id}"
job_config = bigquery.LoadJobConfig()
job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON
job_config.schema = [
bigquery.SchemaField("id", enums.SqlTypeNames.STRING),
bigquery.SchemaField("user_id", enums.SqlTypeNames.INTEGER),
bigquery.SchemaField("login_time", enums.SqlTypeNames.TIMESTAMP),
bigquery.SchemaField("logout_time", enums.SqlTypeNames.TIMESTAMP),
bigquery.SchemaField("ip_address", enums.SqlTypeNames.STRING),
]

with open(filepath, "rb") as json_file:
load_job = client.load_table_from_file(
json_file, full_table_id, job_config=job_config
)

# Wait for load job to finish.
load_job.result()


def update_with_dml(
client: bigquery.Client, project_id: str, dataset_id: str, table_id: str
):
query_text = f"""
UPDATE `{project_id}.{dataset_id}.{table_id}`
SET ip_address = REGEXP_REPLACE(ip_address, r"(\\.[0-9]+)$", ".0")
WHERE TRUE
"""
query_job = client.query(query_text)

# Wait for query job to finish.
query_job.result()

print(f"DML query modified {query_job.num_dml_affected_rows} rows.")
return query_job.num_dml_affected_rows


def run_sample(override_values={}):
client = bigquery.Client()
filepath = pathlib.Path(__file__).parent / "user_sessions_data.json"
project_id = client.project
dataset_id = "sample_db"
table_id = "UserSessions"
# [END bigquery_update_with_dml]
# To facilitate testing, we replace values with alternatives
# provided by the testing harness.
dataset_id = override_values.get("dataset_id", dataset_id)
table_id = override_values.get("table_id", table_id)
# [START bigquery_update_with_dml]
load_from_newline_delimited_json(client, filepath, project_id, dataset_id, table_id)
return update_with_dml(client, project_id, dataset_id, table_id)


# [END bigquery_update_with_dml]
10 changes: 10 additions & 0 deletions samples/snippets/user_sessions_data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{"id":"2ad525d6-c832-4c3d-b7fe-59d104885519","user_id":"38","login_time":"1.47766087E9","logout_time":"1.477661109E9","ip_address":"192.0.2.12"}
{"id":"53d65e20-6ea9-4650-98d9-a2111fbd1122","user_id":"88","login_time":"1.47707544E9","logout_time":"1.477075519E9","ip_address":"192.0.2.88"}
{"id":"5e6c3021-d5e7-4ccd-84b2-adfa9176d13d","user_id":"39","login_time":"1.474022869E9","logout_time":"1.474022961E9","ip_address":"203.0.113.52"}
{"id":"6196eefa-1498-4567-8ef0-498845b888d9","user_id":"52","login_time":"1.478604612E9","logout_time":"1.478604691E9","ip_address":"203.0.113.169"}
{"id":"70656dc5-7e0f-49cf-9e00-f06ed93c1f5b","user_id":"46","login_time":"1.474089924E9","logout_time":"1.474090227E9","ip_address":"192.0.2.10"}
{"id":"aafa5eef-ad49-49a7-9a0f-fbc7fd639bd3","user_id":"40","login_time":"1.478031161E9","logout_time":"1.478031388E9","ip_address":"203.0.113.18"}
{"id":"d2792fc2-24dd-4260-9456-3fbe6cdfdd90","user_id":"5","login_time":"1.481259081E9","logout_time":"1.481259247E9","ip_address":"192.0.2.140"}
{"id":"d835dc49-32f9-4790-b4eb-dddee62e0dcc","user_id":"62","login_time":"1.478892977E9","logout_time":"1.478893219E9","ip_address":"203.0.113.83"}
{"id":"f4a0d3c7-351f-471c-8e11-e093e7a6ce75","user_id":"89","login_time":"1.459031555E9","logout_time":"1.459031831E9","ip_address":"203.0.113.233"}
{"id":"f6e9f526-5b22-4679-9c3e-56a636e815bb","user_id":"97","login_time":"1.482426034E9","logout_time":"1.482426415E9","ip_address":"203.0.113.167"}

0 comments on commit ff2ec3a

Please sign in to comment.