diff --git a/bigquery/samples/export_data_to_cloud_storage.py b/bigquery/samples/export_data_to_cloud_storage.py index 7b361e68d403..62675d66d209 100644 --- a/bigquery/samples/export_data_to_cloud_storage.py +++ b/bigquery/samples/export_data_to_cloud_storage.py @@ -21,6 +21,19 @@ def export_table(service, cloud_storage_path, projectId, datasetId, tableId, num_retries=5): + """ + Starts an export job + + Args: + service: initialized and authorized bigquery + google-api-client object, + cloud_storage_path: fully qualified + path to a Google Cloud Storage location, + e.g. gs://mybucket/myfolder/ + + Returns: an extract job resource representing the + job, see https://cloud.google.com/bigquery/docs/reference/v2/jobs + """ # Generate a unique job_id so retries # don't accidentally duplicate export job_data = { diff --git a/bigquery/samples/load_data_by_post.py b/bigquery/samples/load_data_by_post.py index 6c03885c4db8..8ba4b8838270 100644 --- a/bigquery/samples/load_data_by_post.py +++ b/bigquery/samples/load_data_by_post.py @@ -22,6 +22,18 @@ # [START make_post] def make_post(http, schema, data, projectId, datasetId, tableId): + """ + Creates an http POST request for loading data into + a bigquery table + + Args: + http: an authorized httplib2 client, + schema: a valid bigquery schema, + see https://cloud.google.com/bigquery/docs/reference/v2/tables, + data: valid JSON to insert into the table + + Returns: an http.request object + """ url = ('https://www.googleapis.com/upload/bigquery/v2/projects/' + projectId + '/jobs') # Create the body of the request, separated by a boundary of xxx diff --git a/bigquery/samples/load_data_from_csv.py b/bigquery/samples/load_data_from_csv.py index 73a108d3eee3..a58fc32d9885 100644 --- a/bigquery/samples/load_data_from_csv.py +++ b/bigquery/samples/load_data_from_csv.py @@ -20,6 +20,21 @@ # [START load_table] def load_table(service, source_schema, source_csv, projectId, datasetId, tableId, num_retries=5): + """ + Starts a job to load a bigquery table from CSV + + Args: + service: an initialized and authorized bigquery + google-api-client object + source_schema: a valid bigquery schema, + see https://cloud.google.com/bigquery/docs/reference/v2/tables + source_csv: the fully qualified Google Cloud Storage location of + the data to load into your table + + Returns: a bigquery load job, see + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load + """ + # Generate a unique job_id so retries # don't accidentally duplicate query job_data = { diff --git a/bigquery/samples/utils.py b/bigquery/samples/utils.py index dd7a3900cbcd..0b375cefc034 100644 --- a/bigquery/samples/utils.py +++ b/bigquery/samples/utils.py @@ -15,8 +15,11 @@ # [START get_service] def get_service(): + """returns an initialized and authorized bigquery client""" + from googleapiclient.discovery import build from oauth2client.client import GoogleCredentials + credentials = GoogleCredentials.get_application_default() if credentials.create_scoped_required(): credentials = credentials.create_scoped( @@ -27,6 +30,8 @@ def get_service(): # [START poll_job] def poll_job(service, projectId, jobId, interval=5, num_retries=5): + """checks the status of a job every *interval* seconds""" + import time job_get = service.jobs().get(projectId=projectId, jobId=jobId) @@ -44,6 +49,8 @@ def poll_job(service, projectId, jobId, interval=5, num_retries=5): # [START paging] def paging(service, request_func, num_retries=5, **kwargs): + """pages though the results of an asynchronous job""" + has_next = True while has_next: response = request_func(**kwargs).execute(num_retries=num_retries)