From 9ed69971bf200eb95c4c961dc6643f211d7485ce Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 29 Mar 2016 13:48:31 -0400 Subject: [PATCH] Expand/clarify synchronous query usage docs. Address three separate cases: - Query finishes within timeout, with all rows present. - Query finishes within timeout, but result set is too large for initial response. - Query times out. Closes #1551. --- docs/bigquery-usage.rst | 88 +++++++++++++++++++++++++++++++++++------ 1 file changed, 75 insertions(+), 13 deletions(-) diff --git a/docs/bigquery-usage.rst b/docs/bigquery-usage.rst index f84bf638f3cb..edab2e1510d4 100644 --- a/docs/bigquery-usage.rst +++ b/docs/bigquery-usage.rst @@ -291,26 +291,88 @@ Run a query which can be expected to complete within bounded time: >>> from gcloud import bigquery >>> client = bigquery.Client() - >>> query = """\ - SELECT count(*) AS age_count FROM dataset_name.person_ages - """ - >>> query = client.run_sync_query(query) + >>> QUERY = """\ + ... SELECT count(*) AS age_count FROM dataset_name.person_ages + ... """ + >>> query = client.run_sync_query(QUERY) + >>> query.timeout_ms = 1000 + >>> query.run() # API request + >>> query.complete + True + >>> len(query.schema) + 1 + >>> field = query.schema[0] + >>> field.name + u'count' + >>> field.field_type + u'INTEGER' + >>> field.mode + u'NULLABLE' + >>> query.rows + [(15,)] + >>> query.total_rows + 1 + +If the rows returned by the query do not fit into the inital response, +then we need to fetch the remaining rows via ``fetch_data``: + +.. doctest:: + + >>> from gcloud import bigquery + >>> client = bigquery.Client() + >>> QUERY = """\ + ... SELECT * FROM dataset_name.person_ages + ... """ + >>> query = client.run_sync_query(QUERY) + >>> query.timeout_ms = 1000 + >>> query.run() # API request + >>> query.complete + True + >>> query.total_rows + 1234 + >>> query.page_token + '8d6e452459238eb0fe87d8eb191dd526ee70a35e' + >>> do_something_with(query.schema, query.rows) + >>> token = query.page_token # for initial request + >>> while True: + ... do_something_with(query.schema, rows) + ... if token is None: + ... break + ... rows, _, token = query.fetch_data(page_token=token) + + +If the query takes longer than the timeout allowed, ``query.complete`` +will be ``False``. In that case, we need to poll the associated job until +it is done, and then fetch the reuslts: + +.. doctest:: + + >>> from gcloud import bigquery + >>> client = bigquery.Client() + >>> QUERY = """\ + ... SELECT * FROM dataset_name.person_ages + ... """ + >>> query = client.run_sync_query(QUERY) >>> query.timeout_ms = 1000 >>> query.run() # API request + >>> query.complete + False + >>> job = query.job >>> retry_count = 100 - >>> while retry_count > 0 and not job.complete: + >>> while retry_count > 0 and job.state == 'running': ... retry_count -= 1 ... time.sleep(10) - ... query.reload() # API request - >>> query.schema - [{'name': 'age_count', 'type': 'integer', 'mode': 'nullable'}] - >>> query.rows - [(15,)] + ... job.reload() # API call + >>> job.state + 'done' + >>> token = None # for initial request + >>> while True: + ... rows, _, token = query.fetch_data(page_token=token) + ... do_something_with(query.schema, rows) + ... if token is None: + ... break -.. note:: - If the query takes longer than the timeout allowed, ``job.complete`` - will be ``False``: we therefore poll until it is completed. Querying data (asynchronous) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~