From b56a5346c881a5764294d4ee992d503d7d8d387c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 29 Nov 2018 13:18:58 -0800 Subject: [PATCH] Add BigQuery Storage API client (currently alpha) (#6696) * Generate google-cloud-bigquery-storage library using synthtool. (#472) * Add simple system test for BigQuery Storage API (#473) * Add simple system test for BigQuery Storage API For the system tests to run correctly, an unreleased version of api_core is required. Updates the setup.py file to indicate the minimum version. * Remove mock dependency and unnecessary imports. * Use -e for local deps. * Add manual client for BigQuery Storage (#474) * Add manual client for BigQuery Storage. The manual client adds a default project, which is used by the wrapped create_read_session to make the project parameter optional. A future purpose of the manual client is to override the read_rows method to return an iterator that can reconnect with the correct offset, much like Spanner's StreamedResultSet class. This work is not yet complete. I'd like feedback on the approach for the manual client first. * Use same client name as wrapped gapic class. * Use subclass for BigQueryStorageClient Also, change create_read_session project argument back to 'parent' to better match the super class. * Add unit tests. * Remove default project from manual client. * Use project_id fixture in system tests. * Regenerate google-cloud-bigquery-storage (#475) * Regenerate google-cloud-bigquery-storage * Re-synthed * Handwritten reader to reconnect on ReadRows streams (#477) * WIP: reader to reconnect on ReadRows streams * Add method to copy a StreamPosition. * Add unit tests for hand-written reader module. * Add docs for new hand-written module. * Add fastavro to root intersphinx * Add ServiceUnavailable to list of resumable exceptions. Pull in .coveragerc file from upstream google-cloud-python. --- .kokoro/continuous/bigquery_storage.cfg | 7 + .kokoro/presubmit/bigquery_storage.cfg | 7 + bigquery_storage/.coveragerc | 16 + bigquery_storage/.gitignore | 1 + bigquery_storage/LICENSE | 201 ++++ bigquery_storage/MANIFEST.in | 5 + bigquery_storage/README.rst | 75 ++ bigquery_storage/docs/conf.py | 313 ++++++ bigquery_storage/docs/gapic/v1beta1/api.rst | 6 + .../docs/gapic/v1beta1/reader.rst | 6 + bigquery_storage/docs/gapic/v1beta1/types.rst | 5 + bigquery_storage/docs/index.rst | 10 + bigquery_storage/google/__init__.py | 22 + bigquery_storage/google/cloud/__init__.py | 22 + .../google/cloud/bigquery_storage.py | 27 + .../bigquery_storage_v1beta1/__init__.py | 42 + .../cloud/bigquery_storage_v1beta1/client.py | 131 +++ .../gapic/__init__.py | 0 .../gapic/big_query_storage_client.py | 610 +++++++++++ .../gapic/big_query_storage_client_config.py | 48 + .../bigquery_storage_v1beta1/gapic/enums.py | 31 + .../gapic/transports/__init__.py | 0 .../big_query_storage_grpc_transport.py | 208 ++++ .../proto/__init__.py | 0 .../proto/avro_pb2.py | 136 +++ .../proto/avro_pb2_grpc.py | 3 + .../proto/read_options_pb2.py | 96 ++ .../proto/read_options_pb2_grpc.py | 3 + .../proto/storage_pb2.py | 950 ++++++++++++++++++ .../proto/storage_pb2_grpc.py | 165 +++ .../proto/table_reference_pb2.py | 149 +++ .../proto/table_reference_pb2_grpc.py | 3 + .../cloud/bigquery_storage_v1beta1/reader.py | 265 +++++ .../cloud/bigquery_storage_v1beta1/types.py | 53 + bigquery_storage/noxfile.py | 133 +++ bigquery_storage/setup.cfg | 10 + bigquery_storage/setup.py | 80 ++ bigquery_storage/synth.py | 113 +++ bigquery_storage/tests/system/test_system.py | 101 ++ .../test_big_query_storage_client_v1beta1.py | 244 +++++ bigquery_storage/tests/unit/test_client.py | 97 ++ bigquery_storage/tests/unit/test_reader.py | 360 +++++++ docs/conf.py | 3 +- 43 files changed, 4756 insertions(+), 1 deletion(-) create mode 100644 .kokoro/continuous/bigquery_storage.cfg create mode 100644 .kokoro/presubmit/bigquery_storage.cfg create mode 100644 bigquery_storage/.coveragerc create mode 100644 bigquery_storage/.gitignore create mode 100644 bigquery_storage/LICENSE create mode 100644 bigquery_storage/MANIFEST.in create mode 100644 bigquery_storage/README.rst create mode 100644 bigquery_storage/docs/conf.py create mode 100644 bigquery_storage/docs/gapic/v1beta1/api.rst create mode 100644 bigquery_storage/docs/gapic/v1beta1/reader.rst create mode 100644 bigquery_storage/docs/gapic/v1beta1/types.rst create mode 100644 bigquery_storage/docs/index.rst create mode 100644 bigquery_storage/google/__init__.py create mode 100644 bigquery_storage/google/cloud/__init__.py create mode 100644 bigquery_storage/google/cloud/bigquery_storage.py create mode 100644 bigquery_storage/google/cloud/bigquery_storage_v1beta1/__init__.py create mode 100644 bigquery_storage/google/cloud/bigquery_storage_v1beta1/client.py create mode 100644 bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/__init__.py create mode 100644 bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client.py create mode 100644 bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client_config.py create mode 100644 bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/enums.py create mode 100644 bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/transports/__init__.py create mode 100644 bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/transports/big_query_storage_grpc_transport.py create mode 100644 bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/__init__.py create mode 100644 bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/avro_pb2.py create mode 100644 bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/avro_pb2_grpc.py create mode 100644 bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/read_options_pb2.py create mode 100644 bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/read_options_pb2_grpc.py create mode 100644 bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2.py create mode 100644 bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2_grpc.py create mode 100644 bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/table_reference_pb2.py create mode 100644 bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/table_reference_pb2_grpc.py create mode 100644 bigquery_storage/google/cloud/bigquery_storage_v1beta1/reader.py create mode 100644 bigquery_storage/google/cloud/bigquery_storage_v1beta1/types.py create mode 100644 bigquery_storage/noxfile.py create mode 100644 bigquery_storage/setup.cfg create mode 100644 bigquery_storage/setup.py create mode 100644 bigquery_storage/synth.py create mode 100644 bigquery_storage/tests/system/test_system.py create mode 100644 bigquery_storage/tests/unit/gapic/v1beta1/test_big_query_storage_client_v1beta1.py create mode 100644 bigquery_storage/tests/unit/test_client.py create mode 100644 bigquery_storage/tests/unit/test_reader.py diff --git a/.kokoro/continuous/bigquery_storage.cfg b/.kokoro/continuous/bigquery_storage.cfg new file mode 100644 index 000000000000..7f72bfed0587 --- /dev/null +++ b/.kokoro/continuous/bigquery_storage.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Tell the trampoline which build file to use. +env_vars: { + key: "PACKAGE" + value: "bigquery_storage" +} diff --git a/.kokoro/presubmit/bigquery_storage.cfg b/.kokoro/presubmit/bigquery_storage.cfg new file mode 100644 index 000000000000..7f72bfed0587 --- /dev/null +++ b/.kokoro/presubmit/bigquery_storage.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Tell the trampoline which build file to use. +env_vars: { + key: "PACKAGE" + value: "bigquery_storage" +} diff --git a/bigquery_storage/.coveragerc b/bigquery_storage/.coveragerc new file mode 100644 index 000000000000..8aba24fd6a78 --- /dev/null +++ b/bigquery_storage/.coveragerc @@ -0,0 +1,16 @@ +[run] +branch = True + +[report] +fail_under = 100 +show_missing = True +exclude_lines = + # Re-enable the standard pragma + pragma: NO COVER + # Ignore debug-only repr + def __repr__ + # Ignore abstract methods + raise NotImplementedError +omit = + */gapic/*.py + */proto/*.py \ No newline at end of file diff --git a/bigquery_storage/.gitignore b/bigquery_storage/.gitignore new file mode 100644 index 000000000000..9e3a5f25770c --- /dev/null +++ b/bigquery_storage/.gitignore @@ -0,0 +1 @@ +docs/_build \ No newline at end of file diff --git a/bigquery_storage/LICENSE b/bigquery_storage/LICENSE new file mode 100644 index 000000000000..a8ee855de2aa --- /dev/null +++ b/bigquery_storage/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/bigquery_storage/MANIFEST.in b/bigquery_storage/MANIFEST.in new file mode 100644 index 000000000000..9cbf175afe6b --- /dev/null +++ b/bigquery_storage/MANIFEST.in @@ -0,0 +1,5 @@ +include README.rst LICENSE +recursive-include google *.json *.proto +recursive-include tests * +global-exclude *.py[co] +global-exclude __pycache__ diff --git a/bigquery_storage/README.rst b/bigquery_storage/README.rst new file mode 100644 index 000000000000..22c9292c1023 --- /dev/null +++ b/bigquery_storage/README.rst @@ -0,0 +1,75 @@ +Python Client for BigQuery Storage API (`Alpha`_) +================================================= + +`BigQuery Storage API`_: + +- `Client Library Documentation`_ +- `Product Documentation`_ + +.. _Alpha: https://github.com/GoogleCloudPlatform/google-cloud-python/blob/master/README.rst +.. _BigQuery Storage API: https://cloud.google.com/bigquery +.. _Client Library Documentation: https://googlecloudplatform.github.io/google-cloud-python/stable/bigquery_storage/index.html +.. _Product Documentation: https://cloud.google.com/bigquery + +Quick Start +----------- + +In order to use this library, you first need to go through the following steps: + +1. `Select or create a Cloud Platform project.`_ +2. `Enable billing for your project.`_ +3. `Enable the BigQuery Storage API.`_ +4. `Setup Authentication.`_ + +.. _Select or create a Cloud Platform project.: https://console.cloud.google.com/project +.. _Enable billing for your project.: https://cloud.google.com/billing/docs/how-to/modify-project#enable_billing_for_a_project +.. _Enable the BigQuery Storage API.: https://cloud.google.com/bigquerystorage +.. _Setup Authentication.: https://googlecloudplatform.github.io/google-cloud-python/stable/core/auth.html + +Installation +~~~~~~~~~~~~ + +Install this library in a `virtualenv`_ using pip. `virtualenv`_ is a tool to +create isolated Python environments. The basic problem it addresses is one of +dependencies and versions, and indirectly permissions. + +With `virtualenv`_, it's possible to install this library without needing system +install permissions, and without clashing with the installed system +dependencies. + +.. _`virtualenv`: https://virtualenv.pypa.io/en/latest/ + + +Mac/Linux +^^^^^^^^^ + +.. code-block:: console + + pip install virtualenv + virtualenv + source /bin/activate + /bin/pip install google-cloud-bigquery-storage + + +Windows +^^^^^^^ + +.. code-block:: console + + pip install virtualenv + virtualenv + \Scripts\activate + \Scripts\pip.exe install google-cloud-bigquery-storage + +Next Steps +~~~~~~~~~~ + +- Read the `Client Library Documentation`_ for BigQuery Storage API + API to see other available methods on the client. +- Read the `BigQuery Storage API Product documentation`_ to learn + more about the product and see How-to Guides. +- View this `repository’s main README`_ to see the full list of Cloud + APIs that we cover. + +.. _BigQuery Storage API Product documentation: https://cloud.google.com/bigquery +.. _repository’s main README: https://github.com/GoogleCloudPlatform/google-cloud-python/blob/master/README.rst \ No newline at end of file diff --git a/bigquery_storage/docs/conf.py b/bigquery_storage/docs/conf.py new file mode 100644 index 000000000000..50efafe7cfbd --- /dev/null +++ b/bigquery_storage/docs/conf.py @@ -0,0 +1,313 @@ +# -*- coding: utf-8 -*- +# +# google-cloud-bigquerystorage documentation build configuration file +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys +import os +import shlex + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +sys.path.insert(0, os.path.abspath('..')) + +__version__ = '0.1.0' + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', + 'sphinx.ext.intersphinx', + 'sphinx.ext.coverage', + 'sphinx.ext.napoleon', + 'sphinx.ext.viewcode', +] + +# autodoc/autosummary flags +autoclass_content = 'both' +autodoc_default_flags = ['members'] +autosummary_generate = True + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'google-cloud-bigquerystorage' +copyright = u'2017, Google' +author = u'Google APIs' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The full version, including alpha/beta/rc tags. +release = __version__ +# The short X.Y version. +version = '.'.join(release.split('.')[0:2]) + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +#keep_warnings = False + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = True + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'sphinx_rtd_theme' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +# html_static_path = [] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +#html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Language to be used for generating the HTML full-text search index. +# Sphinx supports the following languages: +# 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' +# 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr' +#html_search_language = 'en' + +# A dictionary with options for the search language support, empty by default. +# Now only 'ja' uses this config value +#html_search_options = {'type': 'default'} + +# The name of a javascript file (relative to the configuration directory) that +# implements a search results scorer. If empty, the default will be used. +#html_search_scorer = 'scorer.js' + +# Output file base name for HTML help builder. +htmlhelp_basename = 'google-cloud-bigquerystorage-doc' + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + #'preamble': '', + + # Latex figure (float) alignment + #'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'google-cloud-bigquerystorage.tex', + u'google-cloud-bigquerystorage Documentation', author, 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [(master_doc, 'google-cloud-bigquerystorage', + u'google-cloud-bigquerystorage Documentation', [author], 1)] + +# If true, show URL addresses after external links. +#man_show_urls = False + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'google-cloud-bigquerystorage', + u'google-cloud-bigquerystorage Documentation', author, + 'google-cloud-bigquerystorage', + 'GAPIC library for the {metadata.shortName} v1beta1 service', 'APIs'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +#texinfo_no_detailmenu = False + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = { + 'python': ('http://python.readthedocs.org/en/latest/', None), + 'gax': ('https://gax-python.readthedocs.org/en/latest/', None), + 'fastavro': ('https://fastavro.readthedocs.io/en/stable/', None), + 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), +} + +# Napoleon settings +napoleon_google_docstring = True +napoleon_numpy_docstring = True +napoleon_include_private_with_doc = False +napoleon_include_special_with_doc = True +napoleon_use_admonition_for_examples = False +napoleon_use_admonition_for_notes = False +napoleon_use_admonition_for_references = False +napoleon_use_ivar = False +napoleon_use_param = True +napoleon_use_rtype = True diff --git a/bigquery_storage/docs/gapic/v1beta1/api.rst b/bigquery_storage/docs/gapic/v1beta1/api.rst new file mode 100644 index 000000000000..d4df98557e15 --- /dev/null +++ b/bigquery_storage/docs/gapic/v1beta1/api.rst @@ -0,0 +1,6 @@ +Client for BigQuery Storage API +=============================== + +.. automodule:: google.cloud.bigquery_storage_v1beta1 + :members: + :inherited-members: \ No newline at end of file diff --git a/bigquery_storage/docs/gapic/v1beta1/reader.rst b/bigquery_storage/docs/gapic/v1beta1/reader.rst new file mode 100644 index 000000000000..5b6af828f53e --- /dev/null +++ b/bigquery_storage/docs/gapic/v1beta1/reader.rst @@ -0,0 +1,6 @@ +Reader for BigQuery Storage API +=============================== + +.. automodule:: google.cloud.bigquery_storage_v1beta1.reader + :members: + :inherited-members: diff --git a/bigquery_storage/docs/gapic/v1beta1/types.rst b/bigquery_storage/docs/gapic/v1beta1/types.rst new file mode 100644 index 000000000000..a36210a64e52 --- /dev/null +++ b/bigquery_storage/docs/gapic/v1beta1/types.rst @@ -0,0 +1,5 @@ +Types for BigQuery Storage API Client +===================================== + +.. automodule:: google.cloud.bigquery_storage_v1beta1.types + :members: \ No newline at end of file diff --git a/bigquery_storage/docs/index.rst b/bigquery_storage/docs/index.rst new file mode 100644 index 000000000000..1232e5865ae5 --- /dev/null +++ b/bigquery_storage/docs/index.rst @@ -0,0 +1,10 @@ +.. include:: ../../bigquery_storage/README.rst + +API Reference +------------- +.. toctree:: + :maxdepth: 2 + + gapic/v1beta1/api + gapic/v1beta1/reader + gapic/v1beta1/types diff --git a/bigquery_storage/google/__init__.py b/bigquery_storage/google/__init__.py new file mode 100644 index 000000000000..f65701dd143f --- /dev/null +++ b/bigquery_storage/google/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +try: + import pkg_resources + pkg_resources.declare_namespace(__name__) +except ImportError: + import pkgutil + __path__ = pkgutil.extend_path(__path__, __name__) diff --git a/bigquery_storage/google/cloud/__init__.py b/bigquery_storage/google/cloud/__init__.py new file mode 100644 index 000000000000..f65701dd143f --- /dev/null +++ b/bigquery_storage/google/cloud/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +try: + import pkg_resources + pkg_resources.declare_namespace(__name__) +except ImportError: + import pkgutil + __path__ = pkgutil.extend_path(__path__, __name__) diff --git a/bigquery_storage/google/cloud/bigquery_storage.py b/bigquery_storage/google/cloud/bigquery_storage.py new file mode 100644 index 000000000000..7270e2885319 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +from google.cloud.bigquery_storage_v1beta1 import BigQueryStorageClient +from google.cloud.bigquery_storage_v1beta1 import enums +from google.cloud.bigquery_storage_v1beta1 import types + +__all__ = ( + 'enums', + 'types', + 'BigQueryStorageClient', +) diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/__init__.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/__init__.py new file mode 100644 index 000000000000..b71f61049f68 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/__init__.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +import pkg_resources +__version__ = pkg_resources.get_distribution('google-cloud-bigquery-storage').version # noqa + +from google.cloud.bigquery_storage_v1beta1 import types +from google.cloud.bigquery_storage_v1beta1 import client +from google.cloud.bigquery_storage_v1beta1.gapic import enums + + +class BigQueryStorageClient(client.BigQueryStorageClient): + __doc__ = client.BigQueryStorageClient.__doc__ + enums = enums + + +__all__ = ( + # google.cloud.bigquery_storage_v1beta1 + '__version__', + 'types', + + # google.cloud.bigquery_storage_v1beta1.client + 'BigQueryStorageClient', + + # google.cloud.bigquery_storage_v1beta1.gapic + 'enums', +) diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/client.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/client.py new file mode 100644 index 000000000000..75a34c37346e --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/client.py @@ -0,0 +1,131 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Parent client for calling the Cloud BigQuery Storage API. + +This is the base from which all interactions with the API occur. +""" + +from __future__ import absolute_import + +import google.api_core.gapic_v1.method + +from google.cloud.bigquery_storage_v1beta1 import reader +from google.cloud.bigquery_storage_v1beta1.gapic import big_query_storage_client # noqa + + +_SCOPES = ( + 'https://www.googleapis.com/auth/bigquery', + 'https://www.googleapis.com/auth/cloud-platform', +) + + +class BigQueryStorageClient(big_query_storage_client.BigQueryStorageClient): + """Client for interacting with BigQuery Storage API. + + The BigQuery storage API can be used to read data stored in BigQuery. + """ + + def read_rows(self, + read_position, + retry=google.api_core.gapic_v1.method.DEFAULT, + timeout=google.api_core.gapic_v1.method.DEFAULT, + metadata=None): + """ + Reads rows from the table in the format prescribed by the read + session. Each response contains one or more table rows, up to a + maximum of 10 MiB per response; read requests which attempt to read + individual rows larger than this will fail. + + Each request also returns a set of stream statistics reflecting the + estimated total number of rows in the read stream. This number is + computed based on the total table size and the number of active + streams in the read session, and may change as other streams continue + to read data. + + Example: + >>> from google.cloud import bigquery_storage_v1beta1 + >>> + >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() + >>> + >>> # TODO: Initialize ``table_reference``: + >>> table_reference = { + ... 'project_id': 'your-data-project-id', + ... 'dataset_id': 'your_dataset_id', + ... 'table_id': 'your_table_id', + ... } + >>> + >>> # TODO: Initialize `parent`: + >>> parent = 'projects/your-billing-project-id' + >>> + >>> session = client.create_read_session(table_reference, parent) + >>> read_position = bigquery_storage_v1beta1.types.StreamPosition( + ... stream=session.streams[0], # TODO: Read the other streams. + ... ) + >>> + >>> for element in client.read_rows(read_position): + ... # process element + ... pass + + Args: + read_position (Union[ \ + dict, \ + ~google.cloud.bigquery_storage_v1beta1.types.StreamPosition \ + ]): + Required. Identifier of the position in the stream to start + reading from. The offset requested must be less than the last + row read from ReadRows. Requesting a larger offset is + undefined. If a dict is provided, it must be of the same form + as the protobuf message + :class:`~google.cloud.bigquery_storage_v1beta1.types.StreamPosition` + retry (Optional[google.api_core.retry.Retry]): A retry object used + to retry requests. If ``None`` is specified, requests will not + be retried. + timeout (Optional[float]): The amount of time, in seconds, to wait + for the request to complete. Note that if ``retry`` is + specified, the timeout applies to each individual attempt. + metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata + that is provided to the method. + + Returns: + ~google.cloud.bigquery_storage_v1beta1.reader.ReadRowsStream: + An iterable of + :class:`~google.cloud.bigquery_storage_v1beta1.types.ReadRowsResponse`. + + Raises: + google.api_core.exceptions.GoogleAPICallError: If the request + failed for any reason. + google.api_core.exceptions.RetryError: If the request failed due + to a retryable error and retry attempts failed. + ValueError: If the parameters are invalid. + """ + gapic_client = super(BigQueryStorageClient, self) + stream = gapic_client.read_rows( + read_position, + retry=retry, + timeout=timeout, + metadata=metadata, + ) + return reader.ReadRowsStream( + stream, + gapic_client, + read_position, + { + 'retry': retry, + 'timeout': timeout, + 'metadata': metadata, + }, + ) diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/__init__.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client.py new file mode 100644 index 000000000000..5acd74320b5f --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client.py @@ -0,0 +1,610 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Accesses the google.cloud.bigquery.storage.v1beta1 BigQueryStorage API.""" + +import pkg_resources +import warnings + +from google.oauth2 import service_account +import google.api_core.gapic_v1.client_info +import google.api_core.gapic_v1.config +import google.api_core.gapic_v1.method +import google.api_core.path_template +import google.api_core.gapic_v1.routing_header +import google.api_core.grpc_helpers +import grpc + +from google.cloud.bigquery_storage_v1beta1.gapic import big_query_storage_client_config +from google.cloud.bigquery_storage_v1beta1.gapic import enums +from google.cloud.bigquery_storage_v1beta1.gapic.transports import big_query_storage_grpc_transport +from google.cloud.bigquery_storage_v1beta1.proto import read_options_pb2 +from google.cloud.bigquery_storage_v1beta1.proto import storage_pb2 +from google.cloud.bigquery_storage_v1beta1.proto import storage_pb2_grpc +from google.cloud.bigquery_storage_v1beta1.proto import table_reference_pb2 +from google.protobuf import empty_pb2 + +_GAPIC_LIBRARY_VERSION = pkg_resources.get_distribution( + 'google-cloud-bigquery-storage', ).version + + +class BigQueryStorageClient(object): + """ + BigQuery storage API. + + The BigQuery storage API can be used to read data stored in BigQuery. + """ + + SERVICE_ADDRESS = 'bigquerystorage.googleapis.com:443' + """The default address of the service.""" + + # The name of the interface for this client. This is the key used to + # find the method configuration in the client_config dictionary. + _INTERFACE_NAME = 'google.cloud.bigquery.storage.v1beta1.BigQueryStorage' + + @classmethod + def from_service_account_file(cls, filename, *args, **kwargs): + """Creates an instance of this client using the provided credentials + file. + + Args: + filename (str): The path to the service account private key json + file. + args: Additional arguments to pass to the constructor. + kwargs: Additional arguments to pass to the constructor. + + Returns: + BigQueryStorageClient: The constructed client. + """ + credentials = service_account.Credentials.from_service_account_file( + filename) + kwargs['credentials'] = credentials + return cls(*args, **kwargs) + + from_service_account_json = from_service_account_file + + def __init__(self, + transport=None, + channel=None, + credentials=None, + client_config=big_query_storage_client_config.config, + client_info=None): + """Constructor. + + Args: + transport (Union[~.BigQueryStorageGrpcTransport, + Callable[[~.Credentials, type], ~.BigQueryStorageGrpcTransport]): A transport + instance, responsible for actually making the API calls. + The default transport uses the gRPC protocol. + This argument may also be a callable which returns a + transport instance. Callables will be sent the credentials + as the first argument and the default transport class as + the second argument. + channel (grpc.Channel): DEPRECATED. A ``Channel`` instance + through which to make calls. This argument is mutually exclusive + with ``credentials``; providing both will raise an exception. + credentials (google.auth.credentials.Credentials): The + authorization credentials to attach to requests. These + credentials identify this application to the service. If none + are specified, the client will attempt to ascertain the + credentials from the environment. + This argument is mutually exclusive with providing a + transport instance to ``transport``; doing so will raise + an exception. + client_config (dict): DEPRECATED. A dictionary of call options for + each method. If not specified, the default configuration is used. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing + your own client library. + """ + # Raise deprecation warnings for things we want to go away. + if client_config: + warnings.warn('The `client_config` argument is deprecated.', + PendingDeprecationWarning) + if channel: + warnings.warn( + 'The `channel` argument is deprecated; use ' + '`transport` instead.', PendingDeprecationWarning) + + # Instantiate the transport. + # The transport is responsible for handling serialization and + # deserialization and actually sending data to the service. + if transport: # pragma: no cover + if callable(transport): + self.transport = transport( + credentials=credentials, + default_class=big_query_storage_grpc_transport. + BigQueryStorageGrpcTransport, + ) + else: + if credentials: + raise ValueError( + 'Received both a transport instance and ' + 'credentials; these are mutually exclusive.') + self.transport = transport + else: + self.transport = big_query_storage_grpc_transport.BigQueryStorageGrpcTransport( + address=self.SERVICE_ADDRESS, + channel=channel, + credentials=credentials, + ) + + if client_info is None: + client_info = google.api_core.gapic_v1.client_info.ClientInfo( + gapic_version=_GAPIC_LIBRARY_VERSION, ) + else: + client_info.gapic_version = _GAPIC_LIBRARY_VERSION + self._client_info = client_info + + # Parse out the default settings for retry and timeout for each RPC + # from the client configuration. + # (Ordinarily, these are the defaults specified in the `*_config.py` + # file next to this one.) + self._method_configs = google.api_core.gapic_v1.config.parse_method_configs( + client_config['interfaces'][self._INTERFACE_NAME], ) + + # Save a dictionary of cached API call functions. + # These are the actual callables which invoke the proper + # transport methods, wrapped with `wrap_method` to add retry, + # timeout, and the like. + self._inner_api_calls = {} + + # Service calls + def create_read_session(self, + table_reference, + parent, + table_modifiers=None, + requested_streams=None, + read_options=None, + format_=None, + retry=google.api_core.gapic_v1.method.DEFAULT, + timeout=google.api_core.gapic_v1.method.DEFAULT, + metadata=None): + """ + Creates a new read session. A read session divides the contents of a + BigQuery table into one or more streams, which can then be used to read + data from the table. The read session also specifies properties of the + data to be read, such as a list of columns or a push-down filter describing + the rows to be returned. + + A particular row can be read by at most one stream. When the caller has + reached the end of each stream in the session, then all the data in the + table has been read. + + Read sessions automatically expire 24 hours after they are created and do + not require manual clean-up by the caller. + + Example: + >>> from google.cloud import bigquery_storage_v1beta1 + >>> + >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() + >>> + >>> # TODO: Initialize `table_reference`: + >>> table_reference = {} + >>> + >>> # TODO: Initialize `parent`: + >>> parent = '' + >>> + >>> response = client.create_read_session(table_reference, parent) + + Args: + table_reference (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.TableReference]): Required. Reference to the table to read. + + If a dict is provided, it must be of the same form as the protobuf + message :class:`~google.cloud.bigquery_storage_v1beta1.types.TableReference` + parent (str): Required. String of the form "projects/your-project-id" indicating the + project this ReadSession is associated with. This is the project that will + be billed for usage. + table_modifiers (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.TableModifiers]): Optional. Any modifiers to the Table (e.g. snapshot timestamp). + + If a dict is provided, it must be of the same form as the protobuf + message :class:`~google.cloud.bigquery_storage_v1beta1.types.TableModifiers` + requested_streams (int): Optional. Initial number of streams. If unset or 0, we will + provide a value of streams so as to produce reasonable throughput. Must be + non-negative. The number of streams may be lower than the requested number, + depending on the amount parallelism that is reasonable for the table and + the maximum amount of parallelism allowed by the system. + + Streams must be read starting from offset 0. + read_options (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.TableReadOptions]): Optional. Read options for this session (e.g. column selection, filters). + + If a dict is provided, it must be of the same form as the protobuf + message :class:`~google.cloud.bigquery_storage_v1beta1.types.TableReadOptions` + format_ (~google.cloud.bigquery_storage_v1beta1.types.DataFormat): Data output format. Currently default to Avro. + retry (Optional[google.api_core.retry.Retry]): A retry object used + to retry requests. If ``None`` is specified, requests will not + be retried. + timeout (Optional[float]): The amount of time, in seconds, to wait + for the request to complete. Note that if ``retry`` is + specified, the timeout applies to each individual attempt. + metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata + that is provided to the method. + + Returns: + A :class:`~google.cloud.bigquery_storage_v1beta1.types.ReadSession` instance. + + Raises: + google.api_core.exceptions.GoogleAPICallError: If the request + failed for any reason. + google.api_core.exceptions.RetryError: If the request failed due + to a retryable error and retry attempts failed. + ValueError: If the parameters are invalid. + """ + # Wrap the transport method to add retry and timeout logic. + if 'create_read_session' not in self._inner_api_calls: + self._inner_api_calls[ + 'create_read_session'] = google.api_core.gapic_v1.method.wrap_method( + self.transport.create_read_session, + default_retry=self._method_configs['CreateReadSession']. + retry, + default_timeout=self._method_configs['CreateReadSession']. + timeout, + client_info=self._client_info, + ) + + request = storage_pb2.CreateReadSessionRequest( + table_reference=table_reference, + parent=parent, + table_modifiers=table_modifiers, + requested_streams=requested_streams, + read_options=read_options, + format=format_, + ) + if metadata is None: + metadata = [] + metadata = list(metadata) + try: + routing_header = [('table_reference.project_id', + table_reference.project_id), + ('table_reference.dataset_id', + table_reference.dataset_id)] + except AttributeError: + pass + else: + routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( # pragma: no cover + routing_header) + metadata.append(routing_metadata) # pragma: no cover + + return self._inner_api_calls['create_read_session']( + request, retry=retry, timeout=timeout, metadata=metadata) + + def read_rows(self, + read_position, + retry=google.api_core.gapic_v1.method.DEFAULT, + timeout=google.api_core.gapic_v1.method.DEFAULT, + metadata=None): + """ + Reads rows from the table in the format prescribed by the read session. + Each response contains one or more table rows, up to a maximum of 10 MiB + per response; read requests which attempt to read individual rows larger + than this will fail. + + Each request also returns a set of stream statistics reflecting the + estimated total number of rows in the read stream. This number is computed + based on the total table size and the number of active streams in the read + session, and may change as other streams continue to read data. + + Example: + >>> from google.cloud import bigquery_storage_v1beta1 + >>> + >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() + >>> + >>> # TODO: Initialize `read_position`: + >>> read_position = {} + >>> + >>> for element in client.read_rows(read_position): + ... # process element + ... pass + + Args: + read_position (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.StreamPosition]): Required. Identifier of the position in the stream to start reading from. + The offset requested must be less than the last row read from ReadRows. + Requesting a larger offset is undefined. + + If a dict is provided, it must be of the same form as the protobuf + message :class:`~google.cloud.bigquery_storage_v1beta1.types.StreamPosition` + retry (Optional[google.api_core.retry.Retry]): A retry object used + to retry requests. If ``None`` is specified, requests will not + be retried. + timeout (Optional[float]): The amount of time, in seconds, to wait + for the request to complete. Note that if ``retry`` is + specified, the timeout applies to each individual attempt. + metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata + that is provided to the method. + + Returns: + Iterable[~google.cloud.bigquery_storage_v1beta1.types.ReadRowsResponse]. + + Raises: + google.api_core.exceptions.GoogleAPICallError: If the request + failed for any reason. + google.api_core.exceptions.RetryError: If the request failed due + to a retryable error and retry attempts failed. + ValueError: If the parameters are invalid. + """ + # Wrap the transport method to add retry and timeout logic. + if 'read_rows' not in self._inner_api_calls: + self._inner_api_calls[ + 'read_rows'] = google.api_core.gapic_v1.method.wrap_method( + self.transport.read_rows, + default_retry=self._method_configs['ReadRows'].retry, + default_timeout=self._method_configs['ReadRows'].timeout, + client_info=self._client_info, + ) + + request = storage_pb2.ReadRowsRequest(read_position=read_position, ) + if metadata is None: + metadata = [] + metadata = list(metadata) + try: + routing_header = [('read_position.stream.name', + read_position.stream.name)] + except AttributeError: + pass + else: + routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( # pragma: no cover + routing_header) + metadata.append(routing_metadata) # pragma: no cover + + return self._inner_api_calls['read_rows']( + request, retry=retry, timeout=timeout, metadata=metadata) + + def batch_create_read_session_streams( + self, + session, + requested_streams, + retry=google.api_core.gapic_v1.method.DEFAULT, + timeout=google.api_core.gapic_v1.method.DEFAULT, + metadata=None): + """ + Creates additional streams for a ReadSession. This API can be used to + dynamically adjust the parallelism of a batch processing task upwards by + adding additional workers. + + Example: + >>> from google.cloud import bigquery_storage_v1beta1 + >>> + >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() + >>> + >>> # TODO: Initialize `session`: + >>> session = {} + >>> + >>> # TODO: Initialize `requested_streams`: + >>> requested_streams = 0 + >>> + >>> response = client.batch_create_read_session_streams(session, requested_streams) + + Args: + session (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.ReadSession]): Required. Must be a non-expired session obtained from a call to + CreateReadSession. Only the name field needs to be set. + + If a dict is provided, it must be of the same form as the protobuf + message :class:`~google.cloud.bigquery_storage_v1beta1.types.ReadSession` + requested_streams (int): Required. Number of new streams requested. Must be positive. + Number of added streams may be less than this, see CreateReadSessionRequest + for more information. + retry (Optional[google.api_core.retry.Retry]): A retry object used + to retry requests. If ``None`` is specified, requests will not + be retried. + timeout (Optional[float]): The amount of time, in seconds, to wait + for the request to complete. Note that if ``retry`` is + specified, the timeout applies to each individual attempt. + metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata + that is provided to the method. + + Returns: + A :class:`~google.cloud.bigquery_storage_v1beta1.types.BatchCreateReadSessionStreamsResponse` instance. + + Raises: + google.api_core.exceptions.GoogleAPICallError: If the request + failed for any reason. + google.api_core.exceptions.RetryError: If the request failed due + to a retryable error and retry attempts failed. + ValueError: If the parameters are invalid. + """ + # Wrap the transport method to add retry and timeout logic. + if 'batch_create_read_session_streams' not in self._inner_api_calls: + self._inner_api_calls[ + 'batch_create_read_session_streams'] = google.api_core.gapic_v1.method.wrap_method( + self.transport.batch_create_read_session_streams, + default_retry=self. + _method_configs['BatchCreateReadSessionStreams'].retry, + default_timeout=self. + _method_configs['BatchCreateReadSessionStreams'].timeout, + client_info=self._client_info, + ) + + request = storage_pb2.BatchCreateReadSessionStreamsRequest( + session=session, + requested_streams=requested_streams, + ) + if metadata is None: + metadata = [] + metadata = list(metadata) + try: + routing_header = [('session.name', session.name)] + except AttributeError: + pass + else: + routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( # pragma: no cover + routing_header) + metadata.append(routing_metadata) # pragma: no cover + + return self._inner_api_calls['batch_create_read_session_streams']( + request, retry=retry, timeout=timeout, metadata=metadata) + + def finalize_stream(self, + stream, + retry=google.api_core.gapic_v1.method.DEFAULT, + timeout=google.api_core.gapic_v1.method.DEFAULT, + metadata=None): + """ + Triggers the graceful termination of a single stream in a ReadSession. This + API can be used to dynamically adjust the parallelism of a batch processing + task downwards without losing data. + + This API does not delete the stream -- it remains visible in the + ReadSession, and any data processed by the stream is not released to other + streams. However, no additional data will be assigned to the stream once + this call completes. Callers must continue reading data on the stream until + the end of the stream is reached so that data which has already been + assigned to the stream will be processed. + + This method will return an error if there are no other live streams + in the Session, or if SplitReadStream() has been called on the given + Stream. + + Example: + >>> from google.cloud import bigquery_storage_v1beta1 + >>> + >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() + >>> + >>> # TODO: Initialize `stream`: + >>> stream = {} + >>> + >>> client.finalize_stream(stream) + + Args: + stream (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.Stream]): Stream to finalize. + + If a dict is provided, it must be of the same form as the protobuf + message :class:`~google.cloud.bigquery_storage_v1beta1.types.Stream` + retry (Optional[google.api_core.retry.Retry]): A retry object used + to retry requests. If ``None`` is specified, requests will not + be retried. + timeout (Optional[float]): The amount of time, in seconds, to wait + for the request to complete. Note that if ``retry`` is + specified, the timeout applies to each individual attempt. + metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata + that is provided to the method. + + Raises: + google.api_core.exceptions.GoogleAPICallError: If the request + failed for any reason. + google.api_core.exceptions.RetryError: If the request failed due + to a retryable error and retry attempts failed. + ValueError: If the parameters are invalid. + """ + # Wrap the transport method to add retry and timeout logic. + if 'finalize_stream' not in self._inner_api_calls: + self._inner_api_calls[ + 'finalize_stream'] = google.api_core.gapic_v1.method.wrap_method( + self.transport.finalize_stream, + default_retry=self._method_configs['FinalizeStream'].retry, + default_timeout=self._method_configs['FinalizeStream']. + timeout, + client_info=self._client_info, + ) + + request = storage_pb2.FinalizeStreamRequest(stream=stream, ) + if metadata is None: + metadata = [] + metadata = list(metadata) + try: + routing_header = [('stream.name', stream.name)] + except AttributeError: + pass + else: + routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( # pragma: no cover + routing_header) + metadata.append(routing_metadata) # pragma: no cover + + self._inner_api_calls['finalize_stream']( + request, retry=retry, timeout=timeout, metadata=metadata) + + def split_read_stream(self, + original_stream, + retry=google.api_core.gapic_v1.method.DEFAULT, + timeout=google.api_core.gapic_v1.method.DEFAULT, + metadata=None): + """ + Splits a given read stream into two Streams. These streams are referred + to as the primary and the residual of the split. The original stream can + still be read from in the same manner as before. Both of the returned + streams can also be read from, and the total rows return by both child + streams will be the same as the rows read from the original stream. + + Moreover, the two child streams will be allocated back to back in the + original Stream. Concretely, it is guaranteed that for streams Original, + Primary, and Residual, that Original[0-j] = Primary[0-j] and + Original[j-n] = Residual[0-m] once the streams have been read to + completion. + + This method is guaranteed to be idempotent. + + Example: + >>> from google.cloud import bigquery_storage_v1beta1 + >>> + >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() + >>> + >>> # TODO: Initialize `original_stream`: + >>> original_stream = {} + >>> + >>> response = client.split_read_stream(original_stream) + + Args: + original_stream (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.Stream]): Stream to split. + + If a dict is provided, it must be of the same form as the protobuf + message :class:`~google.cloud.bigquery_storage_v1beta1.types.Stream` + retry (Optional[google.api_core.retry.Retry]): A retry object used + to retry requests. If ``None`` is specified, requests will not + be retried. + timeout (Optional[float]): The amount of time, in seconds, to wait + for the request to complete. Note that if ``retry`` is + specified, the timeout applies to each individual attempt. + metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata + that is provided to the method. + + Returns: + A :class:`~google.cloud.bigquery_storage_v1beta1.types.SplitReadStreamResponse` instance. + + Raises: + google.api_core.exceptions.GoogleAPICallError: If the request + failed for any reason. + google.api_core.exceptions.RetryError: If the request failed due + to a retryable error and retry attempts failed. + ValueError: If the parameters are invalid. + """ + # Wrap the transport method to add retry and timeout logic. + if 'split_read_stream' not in self._inner_api_calls: + self._inner_api_calls[ + 'split_read_stream'] = google.api_core.gapic_v1.method.wrap_method( + self.transport.split_read_stream, + default_retry=self._method_configs['SplitReadStream']. + retry, + default_timeout=self._method_configs['SplitReadStream']. + timeout, + client_info=self._client_info, + ) + + request = storage_pb2.SplitReadStreamRequest( + original_stream=original_stream, ) + if metadata is None: + metadata = [] + metadata = list(metadata) + try: + routing_header = [('original_stream.name', original_stream.name)] + except AttributeError: + pass + else: + routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( # pragma: no cover + routing_header) + metadata.append(routing_metadata) # pragma: no cover + + return self._inner_api_calls['split_read_stream']( + request, retry=retry, timeout=timeout, metadata=metadata) diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client_config.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client_config.py new file mode 100644 index 000000000000..d6357097836f --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client_config.py @@ -0,0 +1,48 @@ +config = { + "interfaces": { + "google.cloud.bigquery.storage.v1beta1.BigQueryStorage": { + "retry_codes": { + "idempotent": ["DEADLINE_EXCEEDED", "UNAVAILABLE"], + "non_idempotent": [] + }, + "retry_params": { + "default": { + "initial_retry_delay_millis": 100, + "retry_delay_multiplier": 1.3, + "max_retry_delay_millis": 60000, + "initial_rpc_timeout_millis": 20000, + "rpc_timeout_multiplier": 1.0, + "max_rpc_timeout_millis": 20000, + "total_timeout_millis": 600000 + } + }, + "methods": { + "CreateReadSession": { + "timeout_millis": 60000, + "retry_codes_name": "idempotent", + "retry_params_name": "default" + }, + "ReadRows": { + "timeout_millis": 86400000, + "retry_codes_name": "idempotent", + "retry_params_name": "default" + }, + "BatchCreateReadSessionStreams": { + "timeout_millis": 60000, + "retry_codes_name": "idempotent", + "retry_params_name": "default" + }, + "FinalizeStream": { + "timeout_millis": 60000, + "retry_codes_name": "idempotent", + "retry_params_name": "default" + }, + "SplitReadStream": { + "timeout_millis": 60000, + "retry_codes_name": "idempotent", + "retry_params_name": "default" + } + } + } + } +} diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/enums.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/enums.py new file mode 100644 index 000000000000..fc6e52d2e6fa --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/enums.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Wrappers for protocol buffer enum types.""" + +import enum + + +class DataFormat(enum.IntEnum): + """ + Data format for input or output data. + + Attributes: + DATA_FORMAT_UNSPECIFIED (int): Data format is unspecified. + AVRO (int): Avro is a standard open source row based file format. + See https://avro.apache.org/ for more details. + """ + DATA_FORMAT_UNSPECIFIED = 0 + AVRO = 1 diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/transports/__init__.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/transports/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/transports/big_query_storage_grpc_transport.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/transports/big_query_storage_grpc_transport.py new file mode 100644 index 000000000000..e5d4483b157e --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/gapic/transports/big_query_storage_grpc_transport.py @@ -0,0 +1,208 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import google.api_core.grpc_helpers + +from google.cloud.bigquery_storage_v1beta1.proto import storage_pb2_grpc + + +class BigQueryStorageGrpcTransport(object): + """gRPC transport class providing stubs for + google.cloud.bigquery.storage.v1beta1 BigQueryStorage API. + + The transport provides access to the raw gRPC stubs, + which can be used to take advantage of advanced + features of gRPC. + """ + # The scopes needed to make gRPC calls to all of the methods defined + # in this service. + _OAUTH_SCOPES = ( + 'https://www.googleapis.com/auth/bigquery', + 'https://www.googleapis.com/auth/cloud-platform', + ) + + def __init__(self, + channel=None, + credentials=None, + address='bigquerystorage.googleapis.com:443'): + """Instantiate the transport class. + + Args: + channel (grpc.Channel): A ``Channel`` instance through + which to make calls. This argument is mutually exclusive + with ``credentials``; providing both will raise an exception. + credentials (google.auth.credentials.Credentials): The + authorization credentials to attach to requests. These + credentials identify this application to the service. If none + are specified, the client will attempt to ascertain the + credentials from the environment. + address (str): The address where the service is hosted. + """ + # If both `channel` and `credentials` are specified, raise an + # exception (channels come with credentials baked in already). + if channel is not None and credentials is not None: # pragma: no cover + raise ValueError( + 'The `channel` and `credentials` arguments are mutually ' + 'exclusive.', ) + + # Create the channel. + if channel is None: # pragma: no cover + channel = self.create_channel( + address=address, + credentials=credentials, + ) + + # gRPC uses objects called "stubs" that are bound to the + # channel and provide a basic method for each RPC. + self._stubs = { + 'big_query_storage_stub': + storage_pb2_grpc.BigQueryStorageStub(channel), + } + + @classmethod + def create_channel(cls, + address='bigquerystorage.googleapis.com:443', + credentials=None): + """Create and return a gRPC channel object. + + Args: + address (str): The host for the channel to use. + credentials (~.Credentials): The + authorization credentials to attach to requests. These + credentials identify this application to the service. If + none are specified, the client will attempt to ascertain + the credentials from the environment. + + Returns: + grpc.Channel: A gRPC channel object. + """ + return google.api_core.grpc_helpers.create_channel( # pragma: no cover + address, + credentials=credentials, + scopes=cls._OAUTH_SCOPES, + ) + + @property + def create_read_session(self): + """Return the gRPC stub for {$apiMethod.name}. + + Creates a new read session. A read session divides the contents of a + BigQuery table into one or more streams, which can then be used to read + data from the table. The read session also specifies properties of the + data to be read, such as a list of columns or a push-down filter describing + the rows to be returned. + + A particular row can be read by at most one stream. When the caller has + reached the end of each stream in the session, then all the data in the + table has been read. + + Read sessions automatically expire 24 hours after they are created and do + not require manual clean-up by the caller. + + Returns: + Callable: A callable which accepts the appropriate + deserialized request object and returns a + deserialized response object. + """ + return self._stubs['big_query_storage_stub'].CreateReadSession + + @property + def read_rows(self): + """Return the gRPC stub for {$apiMethod.name}. + + Reads rows from the table in the format prescribed by the read session. + Each response contains one or more table rows, up to a maximum of 10 MiB + per response; read requests which attempt to read individual rows larger + than this will fail. + + Each request also returns a set of stream statistics reflecting the + estimated total number of rows in the read stream. This number is computed + based on the total table size and the number of active streams in the read + session, and may change as other streams continue to read data. + + Returns: + Callable: A callable which accepts the appropriate + deserialized request object and returns a + deserialized response object. + """ + return self._stubs['big_query_storage_stub'].ReadRows + + @property + def batch_create_read_session_streams(self): + """Return the gRPC stub for {$apiMethod.name}. + + Creates additional streams for a ReadSession. This API can be used to + dynamically adjust the parallelism of a batch processing task upwards by + adding additional workers. + + Returns: + Callable: A callable which accepts the appropriate + deserialized request object and returns a + deserialized response object. + """ + return self._stubs[ + 'big_query_storage_stub'].BatchCreateReadSessionStreams + + @property + def finalize_stream(self): + """Return the gRPC stub for {$apiMethod.name}. + + Triggers the graceful termination of a single stream in a ReadSession. This + API can be used to dynamically adjust the parallelism of a batch processing + task downwards without losing data. + + This API does not delete the stream -- it remains visible in the + ReadSession, and any data processed by the stream is not released to other + streams. However, no additional data will be assigned to the stream once + this call completes. Callers must continue reading data on the stream until + the end of the stream is reached so that data which has already been + assigned to the stream will be processed. + + This method will return an error if there are no other live streams + in the Session, or if SplitReadStream() has been called on the given + Stream. + + Returns: + Callable: A callable which accepts the appropriate + deserialized request object and returns a + deserialized response object. + """ + return self._stubs['big_query_storage_stub'].FinalizeStream + + @property + def split_read_stream(self): + """Return the gRPC stub for {$apiMethod.name}. + + Splits a given read stream into two Streams. These streams are referred + to as the primary and the residual of the split. The original stream can + still be read from in the same manner as before. Both of the returned + streams can also be read from, and the total rows return by both child + streams will be the same as the rows read from the original stream. + + Moreover, the two child streams will be allocated back to back in the + original Stream. Concretely, it is guaranteed that for streams Original, + Primary, and Residual, that Original[0-j] = Primary[0-j] and + Original[j-n] = Residual[0-m] once the streams have been read to + completion. + + This method is guaranteed to be idempotent. + + Returns: + Callable: A callable which accepts the appropriate + deserialized request object and returns a + deserialized response object. + """ + return self._stubs['big_query_storage_stub'].SplitReadStream diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/__init__.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/avro_pb2.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/avro_pb2.py new file mode 100644 index 000000000000..10a029a9cf14 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/avro_pb2.py @@ -0,0 +1,136 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: google/cloud/bigquery/storage_v1beta1/proto/avro.proto + +import sys +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +from google.protobuf import descriptor_pb2 +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor.FileDescriptor( + name='google/cloud/bigquery/storage_v1beta1/proto/avro.proto', + package='google.cloud.bigquery.storage.v1beta1', + syntax='proto3', + serialized_pb=_b('\n6google/cloud/bigquery/storage_v1beta1/proto/avro.proto\x12%google.cloud.bigquery.storage.v1beta1\"\x1c\n\nAvroSchema\x12\x0e\n\x06schema\x18\x01 \x01(\t\"=\n\x08\x41vroRows\x12\x1e\n\x16serialized_binary_rows\x18\x01 \x01(\x0c\x12\x11\n\trow_count\x18\x02 \x01(\x03\x42\x84\x01\n)com.google.cloud.bigquery.storage.v1beta1B\tAvroProtoZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storageb\x06proto3') +) + + + + +_AVROSCHEMA = _descriptor.Descriptor( + name='AvroSchema', + full_name='google.cloud.bigquery.storage.v1beta1.AvroSchema', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='schema', full_name='google.cloud.bigquery.storage.v1beta1.AvroSchema.schema', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=97, + serialized_end=125, +) + + +_AVROROWS = _descriptor.Descriptor( + name='AvroRows', + full_name='google.cloud.bigquery.storage.v1beta1.AvroRows', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='serialized_binary_rows', full_name='google.cloud.bigquery.storage.v1beta1.AvroRows.serialized_binary_rows', index=0, + number=1, type=12, cpp_type=9, label=1, + has_default_value=False, default_value=_b(""), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='row_count', full_name='google.cloud.bigquery.storage.v1beta1.AvroRows.row_count', index=1, + number=2, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=127, + serialized_end=188, +) + +DESCRIPTOR.message_types_by_name['AvroSchema'] = _AVROSCHEMA +DESCRIPTOR.message_types_by_name['AvroRows'] = _AVROROWS +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +AvroSchema = _reflection.GeneratedProtocolMessageType('AvroSchema', (_message.Message,), dict( + DESCRIPTOR = _AVROSCHEMA, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.avro_pb2' + , + __doc__ = """Avro schema. + + + Attributes: + schema: + Json serialized schema, as described at + https://avro.apache.org/docs/1.8.1/spec.html + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.AvroSchema) + )) +_sym_db.RegisterMessage(AvroSchema) + +AvroRows = _reflection.GeneratedProtocolMessageType('AvroRows', (_message.Message,), dict( + DESCRIPTOR = _AVROROWS, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.avro_pb2' + , + __doc__ = """Avro rows. + + + Attributes: + serialized_binary_rows: + Binary serialized rows in a block. + row_count: + The count of rows in the returning block. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.AvroRows) + )) +_sym_db.RegisterMessage(AvroRows) + + +DESCRIPTOR.has_options = True +DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\n)com.google.cloud.bigquery.storage.v1beta1B\tAvroProtoZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage')) +# @@protoc_insertion_point(module_scope) diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/avro_pb2_grpc.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/avro_pb2_grpc.py new file mode 100644 index 000000000000..a89435267cb2 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/avro_pb2_grpc.py @@ -0,0 +1,3 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +import grpc + diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/read_options_pb2.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/read_options_pb2.py new file mode 100644 index 000000000000..7a01b7593c51 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/read_options_pb2.py @@ -0,0 +1,96 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: google/cloud/bigquery/storage_v1beta1/proto/read_options.proto + +import sys +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +from google.protobuf import descriptor_pb2 +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor.FileDescriptor( + name='google/cloud/bigquery/storage_v1beta1/proto/read_options.proto', + package='google.cloud.bigquery.storage.v1beta1', + syntax='proto3', + serialized_pb=_b('\n>google/cloud/bigquery/storage_v1beta1/proto/read_options.proto\x12%google.cloud.bigquery.storage.v1beta1\"D\n\x10TableReadOptions\x12\x17\n\x0fselected_fields\x18\x01 \x03(\t\x12\x17\n\x0frow_restriction\x18\x02 \x01(\tBy\n)com.google.cloud.bigquery.storage.v1beta1ZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storageb\x06proto3') +) + + + + +_TABLEREADOPTIONS = _descriptor.Descriptor( + name='TableReadOptions', + full_name='google.cloud.bigquery.storage.v1beta1.TableReadOptions', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='selected_fields', full_name='google.cloud.bigquery.storage.v1beta1.TableReadOptions.selected_fields', index=0, + number=1, type=9, cpp_type=9, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='row_restriction', full_name='google.cloud.bigquery.storage.v1beta1.TableReadOptions.row_restriction', index=1, + number=2, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=105, + serialized_end=173, +) + +DESCRIPTOR.message_types_by_name['TableReadOptions'] = _TABLEREADOPTIONS +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +TableReadOptions = _reflection.GeneratedProtocolMessageType('TableReadOptions', (_message.Message,), dict( + DESCRIPTOR = _TABLEREADOPTIONS, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.read_options_pb2' + , + __doc__ = """Options dictating how we read a table. + + + Attributes: + selected_fields: + Optional. Names of the fields in the table that should be + read. If empty, all fields will be read. If the specified + field is a nested field, all the sub-fields in the field will + be selected. The output field order is unrelated to the order + of fields in selected\_fields. + row_restriction: + Optional. SQL text filtering statement, similar to a WHERE + clause in a query. Currently, we support combinations of + predicates that are a comparison between a column and a + constant value in SQL statement. Aggregates are not supported. + Example: "a > DATE '2014-9-27' AND (b > 5 and C LIKE 'date')" + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.TableReadOptions) + )) +_sym_db.RegisterMessage(TableReadOptions) + + +DESCRIPTOR.has_options = True +DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\n)com.google.cloud.bigquery.storage.v1beta1ZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage')) +# @@protoc_insertion_point(module_scope) diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/read_options_pb2_grpc.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/read_options_pb2_grpc.py new file mode 100644 index 000000000000..a89435267cb2 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/read_options_pb2_grpc.py @@ -0,0 +1,3 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +import grpc + diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2.py new file mode 100644 index 000000000000..4a11ee41c446 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2.py @@ -0,0 +1,950 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: google/cloud/bigquery/storage_v1beta1/proto/storage.proto + +import sys +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) +from google.protobuf.internal import enum_type_wrapper +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +from google.protobuf import descriptor_pb2 +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.cloud.bigquery_storage_v1beta1.proto import avro_pb2 as google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_avro__pb2 +from google.cloud.bigquery_storage_v1beta1.proto import read_options_pb2 as google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_read__options__pb2 +from google.cloud.bigquery_storage_v1beta1.proto import table_reference_pb2 as google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_table__reference__pb2 +from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 +from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 + + +DESCRIPTOR = _descriptor.FileDescriptor( + name='google/cloud/bigquery/storage_v1beta1/proto/storage.proto', + package='google.cloud.bigquery.storage.v1beta1', + syntax='proto3', + serialized_pb=_b('\n9google/cloud/bigquery/storage_v1beta1/proto/storage.proto\x12%google.cloud.bigquery.storage.v1beta1\x1a\x36google/cloud/bigquery/storage_v1beta1/proto/avro.proto\x1a>google/cloud/bigquery/storage_v1beta1/proto/read_options.proto\x1a\x41google/cloud/bigquery/storage_v1beta1/proto/table_reference.proto\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\")\n\x06Stream\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x11\n\trow_count\x18\x02 \x01(\x03\"_\n\x0eStreamPosition\x12=\n\x06stream\x18\x01 \x01(\x0b\x32-.google.cloud.bigquery.storage.v1beta1.Stream\x12\x0e\n\x06offset\x18\x02 \x01(\x03\"\x80\x03\n\x0bReadSession\x12\x0c\n\x04name\x18\x01 \x01(\t\x12/\n\x0b\x65xpire_time\x18\x02 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12H\n\x0b\x61vro_schema\x18\x05 \x01(\x0b\x32\x31.google.cloud.bigquery.storage.v1beta1.AvroSchemaH\x00\x12>\n\x07streams\x18\x04 \x03(\x0b\x32-.google.cloud.bigquery.storage.v1beta1.Stream\x12N\n\x0ftable_reference\x18\x07 \x01(\x0b\x32\x35.google.cloud.bigquery.storage.v1beta1.TableReference\x12N\n\x0ftable_modifiers\x18\x08 \x01(\x0b\x32\x35.google.cloud.bigquery.storage.v1beta1.TableModifiersB\x08\n\x06schema\"\xf7\x02\n\x18\x43reateReadSessionRequest\x12N\n\x0ftable_reference\x18\x01 \x01(\x0b\x32\x35.google.cloud.bigquery.storage.v1beta1.TableReference\x12\x0e\n\x06parent\x18\x06 \x01(\t\x12N\n\x0ftable_modifiers\x18\x02 \x01(\x0b\x32\x35.google.cloud.bigquery.storage.v1beta1.TableModifiers\x12\x19\n\x11requested_streams\x18\x03 \x01(\x05\x12M\n\x0cread_options\x18\x04 \x01(\x0b\x32\x37.google.cloud.bigquery.storage.v1beta1.TableReadOptions\x12\x41\n\x06\x66ormat\x18\x05 \x01(\x0e\x32\x31.google.cloud.bigquery.storage.v1beta1.DataFormat\"_\n\x0fReadRowsRequest\x12L\n\rread_position\x18\x01 \x01(\x0b\x32\x35.google.cloud.bigquery.storage.v1beta1.StreamPosition\"+\n\x0cStreamStatus\x12\x1b\n\x13\x65stimated_row_count\x18\x01 \x01(\x03\"*\n\x0eThrottleStatus\x12\x18\n\x10throttle_percent\x18\x01 \x01(\x05\"\xf5\x01\n\x10ReadRowsResponse\x12\x44\n\tavro_rows\x18\x03 \x01(\x0b\x32/.google.cloud.bigquery.storage.v1beta1.AvroRowsH\x00\x12\x43\n\x06status\x18\x02 \x01(\x0b\x32\x33.google.cloud.bigquery.storage.v1beta1.StreamStatus\x12N\n\x0fthrottle_status\x18\x05 \x01(\x0b\x32\x35.google.cloud.bigquery.storage.v1beta1.ThrottleStatusB\x06\n\x04rows\"\x86\x01\n$BatchCreateReadSessionStreamsRequest\x12\x43\n\x07session\x18\x01 \x01(\x0b\x32\x32.google.cloud.bigquery.storage.v1beta1.ReadSession\x12\x19\n\x11requested_streams\x18\x02 \x01(\x05\"g\n%BatchCreateReadSessionStreamsResponse\x12>\n\x07streams\x18\x01 \x03(\x0b\x32-.google.cloud.bigquery.storage.v1beta1.Stream\"V\n\x15\x46inalizeStreamRequest\x12=\n\x06stream\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.storage.v1beta1.Stream\"`\n\x16SplitReadStreamRequest\x12\x46\n\x0foriginal_stream\x18\x01 \x01(\x0b\x32-.google.cloud.bigquery.storage.v1beta1.Stream\"\xa9\x01\n\x17SplitReadStreamResponse\x12\x45\n\x0eprimary_stream\x18\x01 \x01(\x0b\x32-.google.cloud.bigquery.storage.v1beta1.Stream\x12G\n\x10remainder_stream\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.storage.v1beta1.Stream*3\n\nDataFormat\x12\x1b\n\x17\x44\x41TA_FORMAT_UNSPECIFIED\x10\x00\x12\x08\n\x04\x41VRO\x10\x01\x32\xdd\x05\n\x0f\x42igQueryStorage\x12\x8a\x01\n\x11\x43reateReadSession\x12?.google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest\x1a\x32.google.cloud.bigquery.storage.v1beta1.ReadSession\"\x00\x12\x7f\n\x08ReadRows\x12\x36.google.cloud.bigquery.storage.v1beta1.ReadRowsRequest\x1a\x37.google.cloud.bigquery.storage.v1beta1.ReadRowsResponse\"\x00\x30\x01\x12\xbc\x01\n\x1d\x42\x61tchCreateReadSessionStreams\x12K.google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsRequest\x1aL.google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsResponse\"\x00\x12h\n\x0e\x46inalizeStream\x12<.google.cloud.bigquery.storage.v1beta1.FinalizeStreamRequest\x1a\x16.google.protobuf.Empty\"\x00\x12\x92\x01\n\x0fSplitReadStream\x12=.google.cloud.bigquery.storage.v1beta1.SplitReadStreamRequest\x1a>.google.cloud.bigquery.storage.v1beta1.SplitReadStreamResponse\"\x00\x42y\n)com.google.cloud.bigquery.storage.v1beta1ZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storageb\x06proto3') + , + dependencies=[google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_avro__pb2.DESCRIPTOR,google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_read__options__pb2.DESCRIPTOR,google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_table__reference__pb2.DESCRIPTOR,google_dot_protobuf_dot_empty__pb2.DESCRIPTOR,google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR,]) + +_DATAFORMAT = _descriptor.EnumDescriptor( + name='DataFormat', + full_name='google.cloud.bigquery.storage.v1beta1.DataFormat', + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor( + name='DATA_FORMAT_UNSPECIFIED', index=0, number=0, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='AVRO', index=1, number=1, + options=None, + type=None), + ], + containing_type=None, + options=None, + serialized_start=2288, + serialized_end=2339, +) +_sym_db.RegisterEnumDescriptor(_DATAFORMAT) + +DataFormat = enum_type_wrapper.EnumTypeWrapper(_DATAFORMAT) +DATA_FORMAT_UNSPECIFIED = 0 +AVRO = 1 + + + +_STREAM = _descriptor.Descriptor( + name='Stream', + full_name='google.cloud.bigquery.storage.v1beta1.Stream', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='name', full_name='google.cloud.bigquery.storage.v1beta1.Stream.name', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='row_count', full_name='google.cloud.bigquery.storage.v1beta1.Stream.row_count', index=1, + number=2, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=349, + serialized_end=390, +) + + +_STREAMPOSITION = _descriptor.Descriptor( + name='StreamPosition', + full_name='google.cloud.bigquery.storage.v1beta1.StreamPosition', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='stream', full_name='google.cloud.bigquery.storage.v1beta1.StreamPosition.stream', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='offset', full_name='google.cloud.bigquery.storage.v1beta1.StreamPosition.offset', index=1, + number=2, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=392, + serialized_end=487, +) + + +_READSESSION = _descriptor.Descriptor( + name='ReadSession', + full_name='google.cloud.bigquery.storage.v1beta1.ReadSession', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='name', full_name='google.cloud.bigquery.storage.v1beta1.ReadSession.name', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='expire_time', full_name='google.cloud.bigquery.storage.v1beta1.ReadSession.expire_time', index=1, + number=2, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='avro_schema', full_name='google.cloud.bigquery.storage.v1beta1.ReadSession.avro_schema', index=2, + number=5, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='streams', full_name='google.cloud.bigquery.storage.v1beta1.ReadSession.streams', index=3, + number=4, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='table_reference', full_name='google.cloud.bigquery.storage.v1beta1.ReadSession.table_reference', index=4, + number=7, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='table_modifiers', full_name='google.cloud.bigquery.storage.v1beta1.ReadSession.table_modifiers', index=5, + number=8, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + _descriptor.OneofDescriptor( + name='schema', full_name='google.cloud.bigquery.storage.v1beta1.ReadSession.schema', + index=0, containing_type=None, fields=[]), + ], + serialized_start=490, + serialized_end=874, +) + + +_CREATEREADSESSIONREQUEST = _descriptor.Descriptor( + name='CreateReadSessionRequest', + full_name='google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='table_reference', full_name='google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest.table_reference', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='parent', full_name='google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest.parent', index=1, + number=6, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='table_modifiers', full_name='google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest.table_modifiers', index=2, + number=2, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='requested_streams', full_name='google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest.requested_streams', index=3, + number=3, type=5, cpp_type=1, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='read_options', full_name='google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest.read_options', index=4, + number=4, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='format', full_name='google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest.format', index=5, + number=5, type=14, cpp_type=8, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=877, + serialized_end=1252, +) + + +_READROWSREQUEST = _descriptor.Descriptor( + name='ReadRowsRequest', + full_name='google.cloud.bigquery.storage.v1beta1.ReadRowsRequest', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='read_position', full_name='google.cloud.bigquery.storage.v1beta1.ReadRowsRequest.read_position', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1254, + serialized_end=1349, +) + + +_STREAMSTATUS = _descriptor.Descriptor( + name='StreamStatus', + full_name='google.cloud.bigquery.storage.v1beta1.StreamStatus', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='estimated_row_count', full_name='google.cloud.bigquery.storage.v1beta1.StreamStatus.estimated_row_count', index=0, + number=1, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1351, + serialized_end=1394, +) + + +_THROTTLESTATUS = _descriptor.Descriptor( + name='ThrottleStatus', + full_name='google.cloud.bigquery.storage.v1beta1.ThrottleStatus', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='throttle_percent', full_name='google.cloud.bigquery.storage.v1beta1.ThrottleStatus.throttle_percent', index=0, + number=1, type=5, cpp_type=1, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1396, + serialized_end=1438, +) + + +_READROWSRESPONSE = _descriptor.Descriptor( + name='ReadRowsResponse', + full_name='google.cloud.bigquery.storage.v1beta1.ReadRowsResponse', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='avro_rows', full_name='google.cloud.bigquery.storage.v1beta1.ReadRowsResponse.avro_rows', index=0, + number=3, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='status', full_name='google.cloud.bigquery.storage.v1beta1.ReadRowsResponse.status', index=1, + number=2, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='throttle_status', full_name='google.cloud.bigquery.storage.v1beta1.ReadRowsResponse.throttle_status', index=2, + number=5, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + _descriptor.OneofDescriptor( + name='rows', full_name='google.cloud.bigquery.storage.v1beta1.ReadRowsResponse.rows', + index=0, containing_type=None, fields=[]), + ], + serialized_start=1441, + serialized_end=1686, +) + + +_BATCHCREATEREADSESSIONSTREAMSREQUEST = _descriptor.Descriptor( + name='BatchCreateReadSessionStreamsRequest', + full_name='google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsRequest', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='session', full_name='google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsRequest.session', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='requested_streams', full_name='google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsRequest.requested_streams', index=1, + number=2, type=5, cpp_type=1, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1689, + serialized_end=1823, +) + + +_BATCHCREATEREADSESSIONSTREAMSRESPONSE = _descriptor.Descriptor( + name='BatchCreateReadSessionStreamsResponse', + full_name='google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsResponse', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='streams', full_name='google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsResponse.streams', index=0, + number=1, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1825, + serialized_end=1928, +) + + +_FINALIZESTREAMREQUEST = _descriptor.Descriptor( + name='FinalizeStreamRequest', + full_name='google.cloud.bigquery.storage.v1beta1.FinalizeStreamRequest', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='stream', full_name='google.cloud.bigquery.storage.v1beta1.FinalizeStreamRequest.stream', index=0, + number=2, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1930, + serialized_end=2016, +) + + +_SPLITREADSTREAMREQUEST = _descriptor.Descriptor( + name='SplitReadStreamRequest', + full_name='google.cloud.bigquery.storage.v1beta1.SplitReadStreamRequest', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='original_stream', full_name='google.cloud.bigquery.storage.v1beta1.SplitReadStreamRequest.original_stream', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=2018, + serialized_end=2114, +) + + +_SPLITREADSTREAMRESPONSE = _descriptor.Descriptor( + name='SplitReadStreamResponse', + full_name='google.cloud.bigquery.storage.v1beta1.SplitReadStreamResponse', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='primary_stream', full_name='google.cloud.bigquery.storage.v1beta1.SplitReadStreamResponse.primary_stream', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='remainder_stream', full_name='google.cloud.bigquery.storage.v1beta1.SplitReadStreamResponse.remainder_stream', index=1, + number=2, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=2117, + serialized_end=2286, +) + +_STREAMPOSITION.fields_by_name['stream'].message_type = _STREAM +_READSESSION.fields_by_name['expire_time'].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP +_READSESSION.fields_by_name['avro_schema'].message_type = google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_avro__pb2._AVROSCHEMA +_READSESSION.fields_by_name['streams'].message_type = _STREAM +_READSESSION.fields_by_name['table_reference'].message_type = google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_table__reference__pb2._TABLEREFERENCE +_READSESSION.fields_by_name['table_modifiers'].message_type = google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_table__reference__pb2._TABLEMODIFIERS +_READSESSION.oneofs_by_name['schema'].fields.append( + _READSESSION.fields_by_name['avro_schema']) +_READSESSION.fields_by_name['avro_schema'].containing_oneof = _READSESSION.oneofs_by_name['schema'] +_CREATEREADSESSIONREQUEST.fields_by_name['table_reference'].message_type = google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_table__reference__pb2._TABLEREFERENCE +_CREATEREADSESSIONREQUEST.fields_by_name['table_modifiers'].message_type = google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_table__reference__pb2._TABLEMODIFIERS +_CREATEREADSESSIONREQUEST.fields_by_name['read_options'].message_type = google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_read__options__pb2._TABLEREADOPTIONS +_CREATEREADSESSIONREQUEST.fields_by_name['format'].enum_type = _DATAFORMAT +_READROWSREQUEST.fields_by_name['read_position'].message_type = _STREAMPOSITION +_READROWSRESPONSE.fields_by_name['avro_rows'].message_type = google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_avro__pb2._AVROROWS +_READROWSRESPONSE.fields_by_name['status'].message_type = _STREAMSTATUS +_READROWSRESPONSE.fields_by_name['throttle_status'].message_type = _THROTTLESTATUS +_READROWSRESPONSE.oneofs_by_name['rows'].fields.append( + _READROWSRESPONSE.fields_by_name['avro_rows']) +_READROWSRESPONSE.fields_by_name['avro_rows'].containing_oneof = _READROWSRESPONSE.oneofs_by_name['rows'] +_BATCHCREATEREADSESSIONSTREAMSREQUEST.fields_by_name['session'].message_type = _READSESSION +_BATCHCREATEREADSESSIONSTREAMSRESPONSE.fields_by_name['streams'].message_type = _STREAM +_FINALIZESTREAMREQUEST.fields_by_name['stream'].message_type = _STREAM +_SPLITREADSTREAMREQUEST.fields_by_name['original_stream'].message_type = _STREAM +_SPLITREADSTREAMRESPONSE.fields_by_name['primary_stream'].message_type = _STREAM +_SPLITREADSTREAMRESPONSE.fields_by_name['remainder_stream'].message_type = _STREAM +DESCRIPTOR.message_types_by_name['Stream'] = _STREAM +DESCRIPTOR.message_types_by_name['StreamPosition'] = _STREAMPOSITION +DESCRIPTOR.message_types_by_name['ReadSession'] = _READSESSION +DESCRIPTOR.message_types_by_name['CreateReadSessionRequest'] = _CREATEREADSESSIONREQUEST +DESCRIPTOR.message_types_by_name['ReadRowsRequest'] = _READROWSREQUEST +DESCRIPTOR.message_types_by_name['StreamStatus'] = _STREAMSTATUS +DESCRIPTOR.message_types_by_name['ThrottleStatus'] = _THROTTLESTATUS +DESCRIPTOR.message_types_by_name['ReadRowsResponse'] = _READROWSRESPONSE +DESCRIPTOR.message_types_by_name['BatchCreateReadSessionStreamsRequest'] = _BATCHCREATEREADSESSIONSTREAMSREQUEST +DESCRIPTOR.message_types_by_name['BatchCreateReadSessionStreamsResponse'] = _BATCHCREATEREADSESSIONSTREAMSRESPONSE +DESCRIPTOR.message_types_by_name['FinalizeStreamRequest'] = _FINALIZESTREAMREQUEST +DESCRIPTOR.message_types_by_name['SplitReadStreamRequest'] = _SPLITREADSTREAMREQUEST +DESCRIPTOR.message_types_by_name['SplitReadStreamResponse'] = _SPLITREADSTREAMRESPONSE +DESCRIPTOR.enum_types_by_name['DataFormat'] = _DATAFORMAT +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +Stream = _reflection.GeneratedProtocolMessageType('Stream', (_message.Message,), dict( + DESCRIPTOR = _STREAM, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Information about a single data stream within a read session. + + + Attributes: + name: + Name of the stream. In the form + ``/projects/{project_id}/stream/{stream_id}`` + row_count: + Rows in the stream. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.Stream) + )) +_sym_db.RegisterMessage(Stream) + +StreamPosition = _reflection.GeneratedProtocolMessageType('StreamPosition', (_message.Message,), dict( + DESCRIPTOR = _STREAMPOSITION, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Expresses a point within a given stream using an offset position. + + + Attributes: + stream: + Identifier for a given Stream. + offset: + Position in the stream. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.StreamPosition) + )) +_sym_db.RegisterMessage(StreamPosition) + +ReadSession = _reflection.GeneratedProtocolMessageType('ReadSession', (_message.Message,), dict( + DESCRIPTOR = _READSESSION, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Information returned from a ``CreateReadSession`` request. + + + Attributes: + name: + Unique identifier for the session. In the form + ``projects/{project_id}/sessions/{session_id}`` + expire_time: + Time at which the session becomes invalid. After this time, + subsequent requests to read this Session will return errors. + schema: + The schema for the read. If read\_options.selected\_fields is + set, the schema may be different from the table schema as it + will only contain the selected fields. + avro_schema: + Avro schema. + streams: + Streams associated with this session. + table_reference: + Table that this ReadSession is reading from. + table_modifiers: + Any modifiers which are applied when reading from the + specified table. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.ReadSession) + )) +_sym_db.RegisterMessage(ReadSession) + +CreateReadSessionRequest = _reflection.GeneratedProtocolMessageType('CreateReadSessionRequest', (_message.Message,), dict( + DESCRIPTOR = _CREATEREADSESSIONREQUEST, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Creates a new read session, which may include additional options such as + requested parallelism, projection filters and constraints. + + + Attributes: + table_reference: + Required. Reference to the table to read. + parent: + Required. String of the form "projects/your-project-id" + indicating the project this ReadSession is associated with. + This is the project that will be billed for usage. + table_modifiers: + Optional. Any modifiers to the Table (e.g. snapshot + timestamp). + requested_streams: + Optional. Initial number of streams. If unset or 0, we will + provide a value of streams so as to produce reasonable + throughput. Must be non-negative. The number of streams may be + lower than the requested number, depending on the amount + parallelism that is reasonable for the table and the maximum + amount of parallelism allowed by the system. Streams must be + read starting from offset 0. + read_options: + Optional. Read options for this session (e.g. column + selection, filters). + format: + Data output format. Currently default to Avro. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest) + )) +_sym_db.RegisterMessage(CreateReadSessionRequest) + +ReadRowsRequest = _reflection.GeneratedProtocolMessageType('ReadRowsRequest', (_message.Message,), dict( + DESCRIPTOR = _READROWSREQUEST, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Requesting row data via ``ReadRows`` must provide Stream position + information. + + + Attributes: + read_position: + Required. Identifier of the position in the stream to start + reading from. The offset requested must be less than the last + row read from ReadRows. Requesting a larger offset is + undefined. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.ReadRowsRequest) + )) +_sym_db.RegisterMessage(ReadRowsRequest) + +StreamStatus = _reflection.GeneratedProtocolMessageType('StreamStatus', (_message.Message,), dict( + DESCRIPTOR = _STREAMSTATUS, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Progress information for a given Stream. + + + Attributes: + estimated_row_count: + Number of estimated rows in the current stream. May change + over time as different readers in the stream progress at rates + which are relatively fast or slow. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.StreamStatus) + )) +_sym_db.RegisterMessage(StreamStatus) + +ThrottleStatus = _reflection.GeneratedProtocolMessageType('ThrottleStatus', (_message.Message,), dict( + DESCRIPTOR = _THROTTLESTATUS, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Information on if the current connection is being throttled. + + + Attributes: + throttle_percent: + How much this connection is being throttled. 0 is no + throttling, 100 is completely throttled. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.ThrottleStatus) + )) +_sym_db.RegisterMessage(ThrottleStatus) + +ReadRowsResponse = _reflection.GeneratedProtocolMessageType('ReadRowsResponse', (_message.Message,), dict( + DESCRIPTOR = _READROWSRESPONSE, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Response from calling ``ReadRows`` may include row data, progress and + throttling information. + + + Attributes: + rows: + Row data is returned in format specified during session + creation. + avro_rows: + Serialized row data in AVRO format. + status: + Estimated stream statistics. + throttle_status: + Throttling status. If unset, the latest response still + describes the current throttling status. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.ReadRowsResponse) + )) +_sym_db.RegisterMessage(ReadRowsResponse) + +BatchCreateReadSessionStreamsRequest = _reflection.GeneratedProtocolMessageType('BatchCreateReadSessionStreamsRequest', (_message.Message,), dict( + DESCRIPTOR = _BATCHCREATEREADSESSIONSTREAMSREQUEST, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Information needed to request additional streams for an established read + session. + + + Attributes: + session: + Required. Must be a non-expired session obtained from a call + to CreateReadSession. Only the name field needs to be set. + requested_streams: + Required. Number of new streams requested. Must be positive. + Number of added streams may be less than this, see + CreateReadSessionRequest for more information. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsRequest) + )) +_sym_db.RegisterMessage(BatchCreateReadSessionStreamsRequest) + +BatchCreateReadSessionStreamsResponse = _reflection.GeneratedProtocolMessageType('BatchCreateReadSessionStreamsResponse', (_message.Message,), dict( + DESCRIPTOR = _BATCHCREATEREADSESSIONSTREAMSRESPONSE, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """The response from ``BatchCreateReadSessionStreams`` returns the stream + identifiers for the newly created streams. + + + Attributes: + streams: + Newly added streams. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsResponse) + )) +_sym_db.RegisterMessage(BatchCreateReadSessionStreamsResponse) + +FinalizeStreamRequest = _reflection.GeneratedProtocolMessageType('FinalizeStreamRequest', (_message.Message,), dict( + DESCRIPTOR = _FINALIZESTREAMREQUEST, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Request information for invoking ``FinalizeStream``. + + + Attributes: + stream: + Stream to finalize. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.FinalizeStreamRequest) + )) +_sym_db.RegisterMessage(FinalizeStreamRequest) + +SplitReadStreamRequest = _reflection.GeneratedProtocolMessageType('SplitReadStreamRequest', (_message.Message,), dict( + DESCRIPTOR = _SPLITREADSTREAMREQUEST, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Request information for ``SplitReadStream``. + + + Attributes: + original_stream: + Stream to split. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.SplitReadStreamRequest) + )) +_sym_db.RegisterMessage(SplitReadStreamRequest) + +SplitReadStreamResponse = _reflection.GeneratedProtocolMessageType('SplitReadStreamResponse', (_message.Message,), dict( + DESCRIPTOR = _SPLITREADSTREAMRESPONSE, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.storage_pb2' + , + __doc__ = """Response from ``SplitReadStream``. + + + Attributes: + primary_stream: + Primary stream. Will contain the beginning portion of + \|original\_stream\|. + remainder_stream: + Remainder stream. Will contain the tail of + \|original\_stream\|. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.SplitReadStreamResponse) + )) +_sym_db.RegisterMessage(SplitReadStreamResponse) + + +DESCRIPTOR.has_options = True +DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\n)com.google.cloud.bigquery.storage.v1beta1ZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage')) + +_BIGQUERYSTORAGE = _descriptor.ServiceDescriptor( + name='BigQueryStorage', + full_name='google.cloud.bigquery.storage.v1beta1.BigQueryStorage', + file=DESCRIPTOR, + index=0, + options=None, + serialized_start=2342, + serialized_end=3075, + methods=[ + _descriptor.MethodDescriptor( + name='CreateReadSession', + full_name='google.cloud.bigquery.storage.v1beta1.BigQueryStorage.CreateReadSession', + index=0, + containing_service=None, + input_type=_CREATEREADSESSIONREQUEST, + output_type=_READSESSION, + options=None, + ), + _descriptor.MethodDescriptor( + name='ReadRows', + full_name='google.cloud.bigquery.storage.v1beta1.BigQueryStorage.ReadRows', + index=1, + containing_service=None, + input_type=_READROWSREQUEST, + output_type=_READROWSRESPONSE, + options=None, + ), + _descriptor.MethodDescriptor( + name='BatchCreateReadSessionStreams', + full_name='google.cloud.bigquery.storage.v1beta1.BigQueryStorage.BatchCreateReadSessionStreams', + index=2, + containing_service=None, + input_type=_BATCHCREATEREADSESSIONSTREAMSREQUEST, + output_type=_BATCHCREATEREADSESSIONSTREAMSRESPONSE, + options=None, + ), + _descriptor.MethodDescriptor( + name='FinalizeStream', + full_name='google.cloud.bigquery.storage.v1beta1.BigQueryStorage.FinalizeStream', + index=3, + containing_service=None, + input_type=_FINALIZESTREAMREQUEST, + output_type=google_dot_protobuf_dot_empty__pb2._EMPTY, + options=None, + ), + _descriptor.MethodDescriptor( + name='SplitReadStream', + full_name='google.cloud.bigquery.storage.v1beta1.BigQueryStorage.SplitReadStream', + index=4, + containing_service=None, + input_type=_SPLITREADSTREAMREQUEST, + output_type=_SPLITREADSTREAMRESPONSE, + options=None, + ), +]) +_sym_db.RegisterServiceDescriptor(_BIGQUERYSTORAGE) + +DESCRIPTOR.services_by_name['BigQueryStorage'] = _BIGQUERYSTORAGE + +# @@protoc_insertion_point(module_scope) diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2_grpc.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2_grpc.py new file mode 100644 index 000000000000..c619db1a5dc3 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2_grpc.py @@ -0,0 +1,165 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +import grpc + +from google.cloud.bigquery_storage_v1beta1.proto import storage_pb2 as google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2 +from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 + + +class BigQueryStorageStub(object): + """BigQuery storage API. + + The BigQuery storage API can be used to read data stored in BigQuery. + """ + + def __init__(self, channel): + """Constructor. + + Args: + channel: A grpc.Channel. + """ + self.CreateReadSession = channel.unary_unary( + '/google.cloud.bigquery.storage.v1beta1.BigQueryStorage/CreateReadSession', + request_serializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.CreateReadSessionRequest.SerializeToString, + response_deserializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.ReadSession.FromString, + ) + self.ReadRows = channel.unary_stream( + '/google.cloud.bigquery.storage.v1beta1.BigQueryStorage/ReadRows', + request_serializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.ReadRowsRequest.SerializeToString, + response_deserializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.ReadRowsResponse.FromString, + ) + self.BatchCreateReadSessionStreams = channel.unary_unary( + '/google.cloud.bigquery.storage.v1beta1.BigQueryStorage/BatchCreateReadSessionStreams', + request_serializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.BatchCreateReadSessionStreamsRequest.SerializeToString, + response_deserializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.BatchCreateReadSessionStreamsResponse.FromString, + ) + self.FinalizeStream = channel.unary_unary( + '/google.cloud.bigquery.storage.v1beta1.BigQueryStorage/FinalizeStream', + request_serializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.FinalizeStreamRequest.SerializeToString, + response_deserializer=google_dot_protobuf_dot_empty__pb2.Empty.FromString, + ) + self.SplitReadStream = channel.unary_unary( + '/google.cloud.bigquery.storage.v1beta1.BigQueryStorage/SplitReadStream', + request_serializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.SplitReadStreamRequest.SerializeToString, + response_deserializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.SplitReadStreamResponse.FromString, + ) + + +class BigQueryStorageServicer(object): + """BigQuery storage API. + + The BigQuery storage API can be used to read data stored in BigQuery. + """ + + def CreateReadSession(self, request, context): + """Creates a new read session. A read session divides the contents of a + BigQuery table into one or more streams, which can then be used to read + data from the table. The read session also specifies properties of the + data to be read, such as a list of columns or a push-down filter describing + the rows to be returned. + + A particular row can be read by at most one stream. When the caller has + reached the end of each stream in the session, then all the data in the + table has been read. + + Read sessions automatically expire 24 hours after they are created and do + not require manual clean-up by the caller. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def ReadRows(self, request, context): + """Reads rows from the table in the format prescribed by the read session. + Each response contains one or more table rows, up to a maximum of 10 MiB + per response; read requests which attempt to read individual rows larger + than this will fail. + + Each request also returns a set of stream statistics reflecting the + estimated total number of rows in the read stream. This number is computed + based on the total table size and the number of active streams in the read + session, and may change as other streams continue to read data. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def BatchCreateReadSessionStreams(self, request, context): + """Creates additional streams for a ReadSession. This API can be used to + dynamically adjust the parallelism of a batch processing task upwards by + adding additional workers. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def FinalizeStream(self, request, context): + """Triggers the graceful termination of a single stream in a ReadSession. This + API can be used to dynamically adjust the parallelism of a batch processing + task downwards without losing data. + + This API does not delete the stream -- it remains visible in the + ReadSession, and any data processed by the stream is not released to other + streams. However, no additional data will be assigned to the stream once + this call completes. Callers must continue reading data on the stream until + the end of the stream is reached so that data which has already been + assigned to the stream will be processed. + + This method will return an error if there are no other live streams + in the Session, or if SplitReadStream() has been called on the given + Stream. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def SplitReadStream(self, request, context): + """Splits a given read stream into two Streams. These streams are referred to + as the primary and the residual of the split. The original stream can still + be read from in the same manner as before. Both of the returned streams can + also be read from, and the total rows return by both child streams will be + the same as the rows read from the original stream. + + Moreover, the two child streams will be allocated back to back in the + original Stream. Concretely, it is guaranteed that for streams Original, + Primary, and Residual, that Original[0-j] = Primary[0-j] and + Original[j-n] = Residual[0-m] once the streams have been read to + completion. + + This method is guaranteed to be idempotent. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + +def add_BigQueryStorageServicer_to_server(servicer, server): + rpc_method_handlers = { + 'CreateReadSession': grpc.unary_unary_rpc_method_handler( + servicer.CreateReadSession, + request_deserializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.CreateReadSessionRequest.FromString, + response_serializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.ReadSession.SerializeToString, + ), + 'ReadRows': grpc.unary_stream_rpc_method_handler( + servicer.ReadRows, + request_deserializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.ReadRowsRequest.FromString, + response_serializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.ReadRowsResponse.SerializeToString, + ), + 'BatchCreateReadSessionStreams': grpc.unary_unary_rpc_method_handler( + servicer.BatchCreateReadSessionStreams, + request_deserializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.BatchCreateReadSessionStreamsRequest.FromString, + response_serializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.BatchCreateReadSessionStreamsResponse.SerializeToString, + ), + 'FinalizeStream': grpc.unary_unary_rpc_method_handler( + servicer.FinalizeStream, + request_deserializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.FinalizeStreamRequest.FromString, + response_serializer=google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, + ), + 'SplitReadStream': grpc.unary_unary_rpc_method_handler( + servicer.SplitReadStream, + request_deserializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.SplitReadStreamRequest.FromString, + response_serializer=google_dot_cloud_dot_bigquery_dot_storage__v1beta1_dot_proto_dot_storage__pb2.SplitReadStreamResponse.SerializeToString, + ), + } + generic_handler = grpc.method_handlers_generic_handler( + 'google.cloud.bigquery.storage.v1beta1.BigQueryStorage', rpc_method_handlers) + server.add_generic_rpc_handlers((generic_handler,)) diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/table_reference_pb2.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/table_reference_pb2.py new file mode 100644 index 000000000000..24e71dcaff54 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/table_reference_pb2.py @@ -0,0 +1,149 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: google/cloud/bigquery/storage_v1beta1/proto/table_reference.proto + +import sys +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +from google.protobuf import descriptor_pb2 +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 + + +DESCRIPTOR = _descriptor.FileDescriptor( + name='google/cloud/bigquery/storage_v1beta1/proto/table_reference.proto', + package='google.cloud.bigquery.storage.v1beta1', + syntax='proto3', + serialized_pb=_b('\nAgoogle/cloud/bigquery/storage_v1beta1/proto/table_reference.proto\x12%google.cloud.bigquery.storage.v1beta1\x1a\x1fgoogle/protobuf/timestamp.proto\"J\n\x0eTableReference\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x10\n\x08table_id\x18\x03 \x01(\t\"C\n\x0eTableModifiers\x12\x31\n\rsnapshot_time\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.TimestampB\x8e\x01\n)com.google.cloud.bigquery.storage.v1beta1B\x13TableReferenceProtoZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storageb\x06proto3') + , + dependencies=[google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR,]) + + + + +_TABLEREFERENCE = _descriptor.Descriptor( + name='TableReference', + full_name='google.cloud.bigquery.storage.v1beta1.TableReference', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='project_id', full_name='google.cloud.bigquery.storage.v1beta1.TableReference.project_id', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='dataset_id', full_name='google.cloud.bigquery.storage.v1beta1.TableReference.dataset_id', index=1, + number=2, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='table_id', full_name='google.cloud.bigquery.storage.v1beta1.TableReference.table_id', index=2, + number=3, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=141, + serialized_end=215, +) + + +_TABLEMODIFIERS = _descriptor.Descriptor( + name='TableModifiers', + full_name='google.cloud.bigquery.storage.v1beta1.TableModifiers', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='snapshot_time', full_name='google.cloud.bigquery.storage.v1beta1.TableModifiers.snapshot_time', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=217, + serialized_end=284, +) + +_TABLEMODIFIERS.fields_by_name['snapshot_time'].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP +DESCRIPTOR.message_types_by_name['TableReference'] = _TABLEREFERENCE +DESCRIPTOR.message_types_by_name['TableModifiers'] = _TABLEMODIFIERS +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +TableReference = _reflection.GeneratedProtocolMessageType('TableReference', (_message.Message,), dict( + DESCRIPTOR = _TABLEREFERENCE, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.table_reference_pb2' + , + __doc__ = """Table reference that includes just the 3 strings needed to identify a + table. + + + Attributes: + project_id: + The assigned project ID of the project. + dataset_id: + The ID of the dataset in the above project. + table_id: + The ID of the table in the above dataset. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.TableReference) + )) +_sym_db.RegisterMessage(TableReference) + +TableModifiers = _reflection.GeneratedProtocolMessageType('TableModifiers', (_message.Message,), dict( + DESCRIPTOR = _TABLEMODIFIERS, + __module__ = 'google.cloud.bigquery.storage_v1beta1.proto.table_reference_pb2' + , + __doc__ = """All fields in this message optional. + + + Attributes: + snapshot_time: + The snapshot time of the table. If not set, interpreted as + now. + """, + # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.TableModifiers) + )) +_sym_db.RegisterMessage(TableModifiers) + + +DESCRIPTOR.has_options = True +DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\n)com.google.cloud.bigquery.storage.v1beta1B\023TableReferenceProtoZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage')) +# @@protoc_insertion_point(module_scope) diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/table_reference_pb2_grpc.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/table_reference_pb2_grpc.py new file mode 100644 index 000000000000..a89435267cb2 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/proto/table_reference_pb2_grpc.py @@ -0,0 +1,3 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +import grpc + diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/reader.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/reader.py new file mode 100644 index 000000000000..da8909f5bfe2 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/reader.py @@ -0,0 +1,265 @@ +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +import itertools +import json + +try: + import fastavro +except ImportError: # pragma: NO COVER + fastavro = None +import google.api_core.exceptions +try: + import pandas +except ImportError: # pragma: NO COVER + pandas = None +import six + +from google.cloud.bigquery_storage_v1beta1 import types + + +_STREAM_RESUMPTION_EXCEPTIONS = ( + google.api_core.exceptions.DeadlineExceeded, + google.api_core.exceptions.ServiceUnavailable, +) +_FASTAVRO_REQUIRED = "fastavro is required to parse Avro blocks" + + +class ReadRowsStream(object): + """A stream of results from a read rows request. + + This stream is an iterable of + :class:`~google.cloud.bigquery_storage_v1beta1.types.ReadRowsResponse`. + Iterate over it to fetch all row blocks. + + If the fastavro library is installed, use the + :func:`~google.cloud.bigquery_storage_v1beta1.reader.ReadRowsStream.rows()` + method to parse all blocks into a stream of row dictionaries. + + If the pandas and fastavro libraries are installed, use the + :func:`~google.cloud.bigquery_storage_v1beta1.reader.ReadRowsStream.to_dataframe()` + method to parse all blocks into a :class:`pandas.DataFrame`. + """ + + def __init__( + self, + wrapped, + client, + read_position, + read_rows_kwargs, + ): + """Construct a ReadRowsStream. + + Args: + wrapped (Iterable[ \ + ~google.cloud.bigquery_storage_v1beta1.types.ReadRowsResponse \ + ]): + The ReadRows stream to read. + client ( \ + ~google.cloud.bigquery_storage_v1beta1.gapic. \ + big_query_storage_client.BigQueryStorageClient \ + ): + A GAPIC client used to reconnect to a ReadRows stream. This + must be the GAPIC client to avoid a circular dependency on + this class. + read_position (Union[ \ + dict, \ + ~google.cloud.bigquery_storage_v1beta1.types.StreamPosition \ + ]): + Required. Identifier of the position in the stream to start + reading from. The offset requested must be less than the last + row read from ReadRows. Requesting a larger offset is + undefined. If a dict is provided, it must be of the same form + as the protobuf message + :class:`~google.cloud.bigquery_storage_v1beta1.types.StreamPosition` + read_rows_kwargs (dict): + Keyword arguments to use when reconnecting to a ReadRows + stream. + + Returns: + Iterable[ \ + ~google.cloud.bigquery_storage_v1beta1.types.ReadRowsResponse \ + ]: + A sequence of row blocks. + """ + + # Make a copy of the read position so that we can update it without + # mutating the original input. + self._position = _copy_stream_position(read_position) + self._client = client + self._wrapped = wrapped + self._read_rows_kwargs = read_rows_kwargs + + def __iter__(self): + """An iterable of blocks. + + Returns: + Iterable[ \ + ~google.cloud.bigquery_storage_v1beta1.types.ReadRowsResponse \ + ]: + A sequence of row blocks. + """ + + # Infinite loop to reconnect on reconnectable errors while processing + # the row stream. + while True: + try: + for block in self._wrapped: + rowcount = block.avro_rows.row_count + self._position.offset += rowcount + yield block + + return # Made it through the whole stream. + except _STREAM_RESUMPTION_EXCEPTIONS: + # Transient error, so reconnect to the stream. + pass + + self._reconnect() + + def _reconnect(self): + """Reconnect to the ReadRows stream using the most recent offset.""" + self._wrapped = self._client.read_rows( + _copy_stream_position(self._position), **self._read_rows_kwargs) + + def rows(self, read_session): + """Iterate over all rows in the stream. + + This method requires the fastavro library in order to parse row + blocks. + + .. warning:: + DATETIME columns are not supported. They are currently parsed as + strings in the fastavro library. + + Args: + read_session ( \ + ~google.cloud.bigquery_storage_v1beta1.types.ReadSession \ + ): + The read session associated with this read rows stream. This + contains the schema, which is required to parse the data + blocks. + + Returns: + Iterable[Mapping]: + A sequence of rows, represented as dictionaries. + """ + if fastavro is None: + raise ImportError(_FASTAVRO_REQUIRED) + + avro_schema = _avro_schema(read_session) + blocks = (_avro_rows(block, avro_schema) for block in self) + return itertools.chain.from_iterable(blocks) + + def to_dataframe(self, read_session): + """Create a :class:`pandas.DataFrame` of all rows in the stream. + + This method requires the pandas libary to create a data frame and the + fastavro library to parse row blocks. + + .. warning:: + DATETIME columns are not supported. They are currently parsed as + strings in the fastavro library. + + Args: + read_session ( \ + ~google.cloud.bigquery_storage_v1beta1.types.ReadSession \ + ): + The read session associated with this read rows stream. This + contains the schema, which is required to parse the data + blocks. + + Returns: + pandas.DataFrame: + A data frame of all rows in the stream. + """ + if fastavro is None: + raise ImportError(_FASTAVRO_REQUIRED) + if pandas is None: + raise ImportError("pandas is required to create a DataFrame") + + avro_schema = _avro_schema(read_session) + frames = [] + for block in self: + dataframe = pandas.DataFrame(list(_avro_rows(block, avro_schema))) + frames.append(dataframe) + return pandas.concat(frames) + + +def _avro_schema(read_session): + """Extract and parse Avro schema from a read session. + + Args: + read_session ( \ + ~google.cloud.bigquery_storage_v1beta1.types.ReadSession \ + ): + The read session associated with this read rows stream. This + contains the schema, which is required to parse the data + blocks. + + Returns: + A parsed Avro schema, using :func:`fastavro.schema.parse_schema`. + """ + json_schema = json.loads(read_session.avro_schema.schema) + return fastavro.parse_schema(json_schema) + + +def _avro_rows(block, avro_schema): + """Parse all rows in a stream block. + + Args: + read_session ( \ + ~google.cloud.bigquery_storage_v1beta1.types.ReadSession \ + ): + The read session associated with this read rows stream. This + contains the schema, which is required to parse the data + blocks. + + Returns: + Iterable[Mapping]: + A sequence of rows, represented as dictionaries. + """ + blockio = six.BytesIO(block.avro_rows.serialized_binary_rows) + while True: + # Loop in a while loop because schemaless_reader can only read + # a single record. + try: + # TODO: Parse DATETIME into datetime.datetime (no timezone), + # instead of as a string. + yield fastavro.schemaless_reader(blockio, avro_schema) + except StopIteration: + break # Finished with block + + +def _copy_stream_position(position): + """Copy a StreamPosition. + + Args: + position (Union[ \ + dict, \ + ~google.cloud.bigquery_storage_v1beta1.types.StreamPosition \ + ]): + StreamPostion (or dictionary in StreamPosition format) to copy. + + Returns: + ~google.cloud.bigquery_storage_v1beta1.types.StreamPosition: + A copy of the input StreamPostion. + """ + if isinstance(position, types.StreamPosition): + output = types.StreamPosition() + output.CopyFrom(position) + return output + + return types.StreamPosition(**position) diff --git a/bigquery_storage/google/cloud/bigquery_storage_v1beta1/types.py b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/types.py new file mode 100644 index 000000000000..9b0d557fe728 --- /dev/null +++ b/bigquery_storage/google/cloud/bigquery_storage_v1beta1/types.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import sys + +from google.api_core.protobuf_helpers import get_messages + +from google.cloud.bigquery_storage_v1beta1.proto import avro_pb2 +from google.cloud.bigquery_storage_v1beta1.proto import read_options_pb2 +from google.cloud.bigquery_storage_v1beta1.proto import storage_pb2 +from google.cloud.bigquery_storage_v1beta1.proto import table_reference_pb2 +from google.protobuf import empty_pb2 +from google.protobuf import timestamp_pb2 + +_shared_modules = [ + empty_pb2, + timestamp_pb2, +] + +_local_modules = [ + avro_pb2, + read_options_pb2, + storage_pb2, + table_reference_pb2, +] + +names = [] + +for module in _shared_modules: + for name, message in get_messages(module).items(): + setattr(sys.modules[__name__], name, message) + names.append(name) +for module in _local_modules: + for name, message in get_messages(module).items(): + message.__module__ = 'google.cloud.bigquery_storage_v1beta1.types' + setattr(sys.modules[__name__], name, message) + names.append(name) + +__all__ = tuple(sorted(names)) diff --git a/bigquery_storage/noxfile.py b/bigquery_storage/noxfile.py new file mode 100644 index 000000000000..d363f1ad5c30 --- /dev/null +++ b/bigquery_storage/noxfile.py @@ -0,0 +1,133 @@ +# Copyright 2018, Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +import os +import shutil + +import nox + + +LOCAL_DEPS = ( + os.path.join('..', 'api_core'), +) + + +def default(session): + """Default unit test session. + + This is intended to be run **without** an interpreter set, so + that the current ``python`` (on the ``PATH``) or the version of + Python corresponding to the ``nox`` binary on the ``PATH`` can + run the tests. + """ + # Install all test dependencies, then install this package in-place. + session.install('mock', 'pytest', 'pytest-cov') + for local_dep in LOCAL_DEPS: + session.install('-e', local_dep) + session.install('-e', '.[pandas,fastavro]') + + # Run py.test against the unit tests. + session.run( + 'py.test', + '--quiet', + '--cov=google.cloud.bigquery_storage', + '--cov=google.cloud.bigquery_storage_v1beta1', + '--cov=tests.unit', + '--cov-append', + '--cov-config=.coveragerc', + '--cov-report=', + os.path.join('tests', 'unit'), + *session.posargs + ) + + +@nox.session(python=['2.7', '3.5', '3.6', '3.7']) +def unit(session): + """Run the unit test suite.""" + default(session) + + +@nox.session(python='3.6') +def lint(session): + """Run linters. + Returns a failure if the linters find linting errors or sufficiently + serious code quality issues. + """ + + session.install('flake8', *LOCAL_DEPS) + session.install('-e', '.') + session.run( + 'flake8', os.path.join('google', 'cloud', 'bigquery_storage_v1beta1')) + session.run('flake8', 'tests') + + +@nox.session(python='3.6') +def lint_setup_py(session): + """Verify that setup.py is valid (including RST check).""" + session.install('docutils', 'pygments') + session.run('python', 'setup.py', 'check', '--restructuredtext', + '--strict') + + +@nox.session(python='3.6') +def cover(session): + """Run the final coverage report. + This outputs the coverage report aggregating coverage from the unit + test runs (not system test runs), and then erases coverage data. + """ + session.install('coverage', 'pytest-cov') + session.run('coverage', 'report', '--show-missing', '--fail-under=100') + session.run('coverage', 'erase') + + +@nox.session(python=['2.7', '3.6']) +def system(session): + """Run the system test suite.""" + + # Sanity check: Only run system tests if the environment variable is set. + if not os.environ.get('GOOGLE_APPLICATION_CREDENTIALS', ''): + session.skip('Credentials must be set via environment variable.') + + # Install all test dependencies, then install this package into the + # virtualenv's dist-packages. + session.install('pytest') + session.install('-e', os.path.join('..', 'test_utils')) + for local_dep in LOCAL_DEPS: + session.install('-e', local_dep) + session.install('-e', '.[pandas,fastavro]') + + # Run py.test against the system tests. + session.run('py.test', '--quiet', 'tests/system/') + + +@nox.session(python='3.6') +def docs(session): + """Build the docs.""" + + session.install('sphinx', 'sphinx_rtd_theme') + session.install('-e', '.[pandas,fastavro]') + + shutil.rmtree(os.path.join('docs', '_build'), ignore_errors=True) + session.run( + 'sphinx-build', + '-W', # warnings as errors + '-T', # show full traceback on exception + '-N', # no colors + '-b', 'html', + '-d', os.path.join('docs', '_build', 'doctrees', ''), + os.path.join('docs', ''), + os.path.join('docs', '_build', 'html', ''), + ) diff --git a/bigquery_storage/setup.cfg b/bigquery_storage/setup.cfg new file mode 100644 index 000000000000..17f660661b30 --- /dev/null +++ b/bigquery_storage/setup.cfg @@ -0,0 +1,10 @@ +[bdist_wheel] +universal = 1 + +[flake8] +exclude = + *_pb2.py + *_pb2_grpc.py + google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client.py + google/cloud/bigquery_storage_v1beta1/gapic/transports/big_query_storage_grpc_transport.py + tests/unit/gapic/v1beta1/test_big_query_storage_client_v1beta1.py diff --git a/bigquery_storage/setup.py b/bigquery_storage/setup.py new file mode 100644 index 000000000000..f8019f14a579 --- /dev/null +++ b/bigquery_storage/setup.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import io +import os + +import setuptools + +name = 'google-cloud-bigquery-storage' +description = 'BigQuery Storage API API client library' +version = '0.1.0' +release_status = '3 - Alpha' +dependencies = [ + 'google-api-core[grpc] >= 1.5.1, < 2.0.0dev', + 'enum34; python_version < "3.4"', +] +extras = { + 'pandas': 'pandas>=0.17.1', + 'fastavro': 'fastavro>=0.21.2', +} + +package_root = os.path.abspath(os.path.dirname(__file__)) + +readme_filename = os.path.join(package_root, 'README.rst') +with io.open(readme_filename, encoding='utf-8') as readme_file: + readme = readme_file.read() + +packages = [ + package for package in setuptools.find_packages() + if package.startswith('google') +] + +namespaces = ['google'] +if 'google.cloud' in packages: + namespaces.append('google.cloud') + +setuptools.setup( + name=name, + version=version, + description=description, + long_description=readme, + author='Google LLC', + author_email='googleapis-packages@google.com', + license='Apache 2.0', + url='https://github.com/GoogleCloudPlatform/google-cloud-python', + classifiers=[ + release_status, + 'Intended Audience :: Developers', + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Operating System :: OS Independent', + 'Topic :: Internet', + ], + platforms='Posix; MacOS X; Windows', + packages=packages, + namespace_packages=namespaces, + install_requires=dependencies, + extras_require=extras, + include_package_data=True, + zip_safe=False, +) diff --git a/bigquery_storage/synth.py b/bigquery_storage/synth.py new file mode 100644 index 000000000000..59f2067b9fdd --- /dev/null +++ b/bigquery_storage/synth.py @@ -0,0 +1,113 @@ +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This script is used to synthesize generated parts of this library.""" + +import synthtool as s +from synthtool import gcp + +gapic = gcp.GAPICGenerator() + +version = 'v1' + +library = gapic.py_library( + 'bigquery-datatransfer', + version, + config_path='/google/cloud/bigquery/storage/' + 'artman_bigquerystorage_v1beta1.yaml', + artman_output_name='bigquerystorage-v1beta1' +) + +s.move( + library, + excludes=[ + 'docs/conf.py', + 'docs/index.rst', + 'google/cloud/bigquery_storage_v1beta1/__init__.py', + 'README.rst', + 'nox*.py', + 'setup.py', + 'setup.cfg', + ], +) + +s.replace( + ['google/cloud/bigquery_storage_v1beta1/proto/storage_pb2.py', + 'google/cloud/bigquery_storage_v1beta1/proto/storage_pb2_grpc.py'], + 'from google.cloud.bigquery.storage_v1beta1.proto', + 'from google.cloud.bigquery_storage_v1beta1.proto', +) + +s.replace( + 'google/cloud/bigquery_storage_v1beta1/gapic/' + 'big_query_storage_client.py', + 'google-cloud-bigquerystorage', + 'google-cloud-bigquery-storage') + +s.replace( + 'google/cloud/bigquery_storage_v1beta1/gapic/' + 'big_query_storage_client.py', + 'import google.api_core.gapic_v1.method\n', + '\g<0>import google.api_core.path_template\n' +) + +s.replace( + ['tests/unit/gapic/v1beta1/test_big_query_storage_client_v1beta1.py'], + 'from google.cloud import bigquery_storage_v1beta1', + 'from google.cloud.bigquery_storage_v1beta1.gapic import big_query_storage_client # noqa', +) + +s.replace( + ['tests/unit/gapic/v1beta1/test_big_query_storage_client_v1beta1.py'], + 'bigquery_storage_v1beta1.BigQueryStorageClient', + 'big_query_storage_client.BigQueryStorageClient', +) + +# START: Ignore lint and coverage +s.replace( + ['google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client.py'], + 'if transport:', + 'if transport: # pragma: no cover', +) + +s.replace( + ['google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client.py'], + r'to_grpc_metadata\(\n', + 'to_grpc_metadata( # pragma: no cover\n', +) + +s.replace( + ['google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client.py'], + r'metadata.append\(routing_metadata\)', + 'metadata.append(routing_metadata) # pragma: no cover', +) + +s.replace( + ['google/cloud/bigquery_storage_v1beta1/gapic/transports/big_query_storage_grpc_transport.py'], + 'if channel is not None and credentials is not None:', + 'if channel is not None and credentials is not None: # pragma: no cover', +) + +s.replace( + ['google/cloud/bigquery_storage_v1beta1/gapic/transports/big_query_storage_grpc_transport.py'], + 'if channel is None:', + 'if channel is None: # pragma: no cover', +) + +s.replace( + ['google/cloud/bigquery_storage_v1beta1/gapic/transports/big_query_storage_grpc_transport.py'], + r'google.api_core.grpc_helpers.create_channel\(', + 'google.api_core.grpc_helpers.create_channel( # pragma: no cover', +) +# END: Ignore lint and coverage diff --git a/bigquery_storage/tests/system/test_system.py b/bigquery_storage/tests/system/test_system.py new file mode 100644 index 000000000000..03e1064e066a --- /dev/null +++ b/bigquery_storage/tests/system/test_system.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""System tests for reading rows from tables.""" + +import os + +import pytest + +from google.cloud import bigquery_storage_v1beta1 + + +@pytest.fixture() +def project_id(): + return os.environ['PROJECT_ID'] + + +@pytest.fixture() +def client(): + return bigquery_storage_v1beta1.BigQueryStorageClient() + + +@pytest.fixture() +def table_reference(): + table_ref = bigquery_storage_v1beta1.types.TableReference() + table_ref.project_id = 'bigquery-public-data' + table_ref.dataset_id = 'usa_names' + table_ref.table_id = 'usa_1910_2013' + return table_ref + + +@pytest.fixture() +def small_table_reference(): + table_ref = bigquery_storage_v1beta1.types.TableReference() + table_ref.project_id = 'bigquery-public-data' + table_ref.dataset_id = 'utility_us' + table_ref.table_id = 'country_code_iso' + return table_ref + + +def test_read_rows_full_table(client, project_id, small_table_reference): + session = client.create_read_session( + small_table_reference, + 'projects/{}'.format(project_id), + requested_streams=1, + ) + + stream_pos = bigquery_storage_v1beta1.types.StreamPosition( + stream=session.streams[0]) + blocks = list(client.read_rows(stream_pos)) + + assert len(blocks) > 0 + block = blocks[0] + assert block.status.estimated_row_count > 0 + assert len(block.avro_rows.serialized_binary_rows) > 0 + + +def test_read_rows_to_dataframe(client, project_id): + table_ref = bigquery_storage_v1beta1.types.TableReference() + table_ref.project_id = 'bigquery-public-data' + table_ref.dataset_id = 'new_york_citibike' + table_ref.table_id = 'citibike_stations' + session = client.create_read_session( + table_ref, + 'projects/{}'.format(project_id), + requested_streams=1, + ) + stream_pos = bigquery_storage_v1beta1.types.StreamPosition( + stream=session.streams[0]) + + frame = client.read_rows(stream_pos).to_dataframe(session) + + # Station ID is a required field (no nulls), so the datatype should always + # be integer. + assert frame.station_id.dtype.name == 'int64' + assert frame['name'].str.startswith('Central Park').any() + + +def test_split_read_stream(client, project_id, table_reference): + session = client.create_read_session( + table_reference, + parent='projects/{}'.format(project_id), + ) + + split = client.split_read_stream(session.streams[0]) + + assert split.primary_stream is not None + assert split.remainder_stream is not None + assert split.primary_stream != split.remainder_stream diff --git a/bigquery_storage/tests/unit/gapic/v1beta1/test_big_query_storage_client_v1beta1.py b/bigquery_storage/tests/unit/gapic/v1beta1/test_big_query_storage_client_v1beta1.py new file mode 100644 index 000000000000..699517f480c0 --- /dev/null +++ b/bigquery_storage/tests/unit/gapic/v1beta1/test_big_query_storage_client_v1beta1.py @@ -0,0 +1,244 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unit tests.""" + +import pytest + +from google.cloud.bigquery_storage_v1beta1.gapic import big_query_storage_client # noqa +from google.cloud.bigquery_storage_v1beta1.proto import storage_pb2 +from google.cloud.bigquery_storage_v1beta1.proto import table_reference_pb2 +from google.protobuf import empty_pb2 + + +class MultiCallableStub(object): + """Stub for the grpc.UnaryUnaryMultiCallable interface.""" + + def __init__(self, method, channel_stub): + self.method = method + self.channel_stub = channel_stub + + def __call__(self, request, timeout=None, metadata=None, credentials=None): + self.channel_stub.requests.append((self.method, request)) + + response = None + if self.channel_stub.responses: + response = self.channel_stub.responses.pop() + + if isinstance(response, Exception): + raise response + + if response: + return response + + +class ChannelStub(object): + """Stub for the grpc.Channel interface.""" + + def __init__(self, responses=[]): + self.responses = responses + self.requests = [] + + def unary_unary(self, + method, + request_serializer=None, + response_deserializer=None): + return MultiCallableStub(method, self) + + def unary_stream(self, + method, + request_serializer=None, + response_deserializer=None): + return MultiCallableStub(method, self) + + +class CustomException(Exception): + pass + + +class TestBigQueryStorageClient(object): + def test_create_read_session(self): + # Setup Expected Response + name = 'name3373707' + expected_response = {'name': name} + expected_response = storage_pb2.ReadSession(**expected_response) + + # Mock the API response + channel = ChannelStub(responses=[expected_response]) + client = big_query_storage_client.BigQueryStorageClient( + channel=channel) + + # Setup Request + table_reference = {} + parent = 'parent-995424086' + + response = client.create_read_session(table_reference, parent) + assert expected_response == response + + assert len(channel.requests) == 1 + expected_request = storage_pb2.CreateReadSessionRequest( + table_reference=table_reference, parent=parent) + actual_request = channel.requests[0][1] + assert expected_request == actual_request + + def test_create_read_session_exception(self): + # Mock the API response + channel = ChannelStub(responses=[CustomException()]) + client = big_query_storage_client.BigQueryStorageClient( + channel=channel) + + # Setup request + table_reference = {} + parent = 'parent-995424086' + + with pytest.raises(CustomException): + client.create_read_session(table_reference, parent) + + def test_read_rows(self): + # Setup Expected Response + expected_response = {} + expected_response = storage_pb2.ReadRowsResponse(**expected_response) + + # Mock the API response + channel = ChannelStub(responses=[iter([expected_response])]) + client = big_query_storage_client.BigQueryStorageClient( + channel=channel) + + # Setup Request + read_position = {} + + response = client.read_rows(read_position) + resources = list(response) + assert len(resources) == 1 + assert expected_response == resources[0] + + assert len(channel.requests) == 1 + expected_request = storage_pb2.ReadRowsRequest( + read_position=read_position) + actual_request = channel.requests[0][1] + assert expected_request == actual_request + + def test_read_rows_exception(self): + # Mock the API response + channel = ChannelStub(responses=[CustomException()]) + client = big_query_storage_client.BigQueryStorageClient( + channel=channel) + + # Setup request + read_position = {} + + with pytest.raises(CustomException): + client.read_rows(read_position) + + def test_batch_create_read_session_streams(self): + # Setup Expected Response + expected_response = {} + expected_response = storage_pb2.BatchCreateReadSessionStreamsResponse( + **expected_response) + + # Mock the API response + channel = ChannelStub(responses=[expected_response]) + client = big_query_storage_client.BigQueryStorageClient( + channel=channel) + + # Setup Request + session = {} + requested_streams = 1017221410 + + response = client.batch_create_read_session_streams( + session, requested_streams) + assert expected_response == response + + assert len(channel.requests) == 1 + expected_request = storage_pb2.BatchCreateReadSessionStreamsRequest( + session=session, requested_streams=requested_streams) + actual_request = channel.requests[0][1] + assert expected_request == actual_request + + def test_batch_create_read_session_streams_exception(self): + # Mock the API response + channel = ChannelStub(responses=[CustomException()]) + client = big_query_storage_client.BigQueryStorageClient( + channel=channel) + + # Setup request + session = {} + requested_streams = 1017221410 + + with pytest.raises(CustomException): + client.batch_create_read_session_streams(session, + requested_streams) + + def test_finalize_stream(self): + channel = ChannelStub() + client = big_query_storage_client.BigQueryStorageClient( + channel=channel) + + # Setup Request + stream = {} + + client.finalize_stream(stream) + + assert len(channel.requests) == 1 + expected_request = storage_pb2.FinalizeStreamRequest(stream=stream) + actual_request = channel.requests[0][1] + assert expected_request == actual_request + + def test_finalize_stream_exception(self): + # Mock the API response + channel = ChannelStub(responses=[CustomException()]) + client = big_query_storage_client.BigQueryStorageClient( + channel=channel) + + # Setup request + stream = {} + + with pytest.raises(CustomException): + client.finalize_stream(stream) + + def test_split_read_stream(self): + # Setup Expected Response + expected_response = {} + expected_response = storage_pb2.SplitReadStreamResponse( + **expected_response) + + # Mock the API response + channel = ChannelStub(responses=[expected_response]) + client = big_query_storage_client.BigQueryStorageClient( + channel=channel) + + # Setup Request + original_stream = {} + + response = client.split_read_stream(original_stream) + assert expected_response == response + + assert len(channel.requests) == 1 + expected_request = storage_pb2.SplitReadStreamRequest( + original_stream=original_stream) + actual_request = channel.requests[0][1] + assert expected_request == actual_request + + def test_split_read_stream_exception(self): + # Mock the API response + channel = ChannelStub(responses=[CustomException()]) + client = big_query_storage_client.BigQueryStorageClient( + channel=channel) + + # Setup request + original_stream = {} + + with pytest.raises(CustomException): + client.split_read_stream(original_stream) diff --git a/bigquery_storage/tests/unit/test_client.py b/bigquery_storage/tests/unit/test_client.py new file mode 100644 index 000000000000..e671b9a3a92a --- /dev/null +++ b/bigquery_storage/tests/unit/test_client.py @@ -0,0 +1,97 @@ +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.api_core.gapic_v1 import client_info +import mock +import pytest + +from google.cloud.bigquery_storage_v1beta1 import types + + +PROJECT = 'my-project' +SERVICE_ACCOUNT_PROJECT = 'project-from-credentials' + + +@pytest.fixture() +def mock_transport(monkeypatch): + from google.cloud.bigquery_storage_v1beta1.gapic.transports import ( + big_query_storage_grpc_transport + ) + transport = mock.create_autospec( + big_query_storage_grpc_transport.BigQueryStorageGrpcTransport, + ) + return transport + + +@pytest.fixture() +def client_under_test(mock_transport): + from google.cloud.bigquery_storage_v1beta1 import client + + # The mock is detected as a callable. By creating a real callable here, the + # mock can still be used to verify RPCs. + def transport_callable(credentials=None, default_class=None): + return mock_transport + + return client.BigQueryStorageClient( + transport=transport_callable, + ) + + +def test_constructor_w_client_info(mock_transport): + from google.cloud.bigquery_storage_v1beta1 import client + + def transport_callable(credentials=None, default_class=None): + return mock_transport + + client_under_test = client.BigQueryStorageClient( + transport=transport_callable, + client_info=client_info.ClientInfo( + client_library_version='test-client-version', + ), + ) + + user_agent = client_under_test._client_info.to_user_agent() + assert 'test-client-version' in user_agent + + +def test_create_read_session(mock_transport, client_under_test): + table_reference = types.TableReference( + project_id='data-project-id', + dataset_id='dataset_id', + table_id='table_id', + ) + + client_under_test.create_read_session( + table_reference, + 'projects/other-project', + ) + + expected_request = types.CreateReadSessionRequest( + table_reference=table_reference, + parent='projects/other-project', + ) + mock_transport.create_read_session.assert_called_once_with( + expected_request, metadata=mock.ANY, timeout=mock.ANY) + + +def test_read_rows(mock_transport, client_under_test): + stream_position = types.StreamPosition() + + client_under_test.read_rows(stream_position) + + expected_request = types.ReadRowsRequest( + read_position=stream_position, + ) + mock_transport.create_read_session.read_rows( + expected_request, metadata=mock.ANY, timeout=mock.ANY) diff --git a/bigquery_storage/tests/unit/test_reader.py b/bigquery_storage/tests/unit/test_reader.py new file mode 100644 index 000000000000..489e81fec176 --- /dev/null +++ b/bigquery_storage/tests/unit/test_reader.py @@ -0,0 +1,360 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import decimal +import itertools +import json + +import fastavro +import mock +import pandas +import pandas.testing +import pytest +import pytz +import six + +import google.api_core.exceptions +from google.cloud import bigquery_storage_v1beta1 + + +PROJECT = "my-project" +BQ_TO_AVRO_TYPES = { + "int64": "long", + "float64": "double", + "bool": "boolean", + "numeric": { + "type": "bytes", + "logicalType": "decimal", + "precision": 38, + "scale": 9, + }, + "string": "string", + "bytes": "bytes", + "date": {"type": "int", "logicalType": "date"}, + "datetime": {"type": "string", "sqlType": "DATETIME"}, + "time": {"type": "long", "logicalType": "time-micros"}, + "timestamp": {"type": "long", "logicalType": "timestamp-micros"}, +} +SCALAR_COLUMNS = [ + {"name": "int_col", "type": "int64"}, + {"name": "float_col", "type": "float64"}, + {"name": "num_col", "type": "numeric"}, + {"name": "bool_col", "type": "bool"}, + {"name": "str_col", "type": "string"}, + {"name": "bytes_col", "type": "bytes"}, + {"name": "date_col", "type": "date"}, + {"name": "time_col", "type": "time"}, + {"name": "ts_col", "type": "timestamp"}, +] +SCALAR_BLOCKS = [ + [ + { + "int_col": 123, + "float_col": 3.14, + "num_col": decimal.Decimal("9.99"), + "bool_col": True, + "str_col": "hello world", + "bytes_col": b"ascii bytes", + "date_col": datetime.date(1998, 9, 4), + "time_col": datetime.time(12, 0), + "ts_col": datetime.datetime(2000, 1, 1, 5, 0, tzinfo=pytz.utc), + }, + { + "int_col": 456, + "float_col": 2.72, + "num_col": decimal.Decimal("0.99"), + "bool_col": False, + "str_col": "hallo welt", + "bytes_col": b"\xbb\xee\xff", + "date_col": datetime.date(1995, 3, 2), + "time_col": datetime.time(13, 37), + "ts_col": datetime.datetime(1965, 4, 3, 2, 1, tzinfo=pytz.utc), + }, + ], + [ + { + "int_col": 789, + "float_col": 1.23, + "num_col": decimal.Decimal("5.67"), + "bool_col": True, + "str_col": u"こんにちは世界", + "bytes_col": b"\x54\x69\x6d", + "date_col": datetime.date(1970, 1, 1), + "time_col": datetime.time(16, 20), + "ts_col": datetime.datetime( + 1991, 8, 25, 20, 57, 8, tzinfo=pytz.utc + ), + } + ], +] + + +@pytest.fixture() +def mut(): + from google.cloud.bigquery_storage_v1beta1 import reader + + return reader + + +@pytest.fixture() +def class_under_test(mut): + return mut.ReadRowsStream + + +@pytest.fixture() +def mock_client(): + from google.cloud.bigquery_storage_v1beta1.gapic import ( + big_query_storage_client, + ) + + return mock.create_autospec(big_query_storage_client.BigQueryStorageClient) + + +def _bq_to_avro_blocks(bq_blocks, avro_schema_json): + avro_schema = fastavro.parse_schema(avro_schema_json) + avro_blocks = [] + for block in bq_blocks: + blockio = six.BytesIO() + for row in block: + fastavro.schemaless_writer(blockio, avro_schema, row) + + response = bigquery_storage_v1beta1.types.ReadRowsResponse() + response.avro_rows.row_count = len(block) + response.avro_rows.serialized_binary_rows = blockio.getvalue() + avro_blocks.append(response) + return avro_blocks + + +def _avro_blocks_w_deadline(avro_blocks): + for block in avro_blocks: + yield block + raise google.api_core.exceptions.DeadlineExceeded('test: please reconnect') + + +def _generate_read_session(avro_schema_json): + schema = json.dumps(avro_schema_json) + return bigquery_storage_v1beta1.types.ReadSession( + avro_schema={"schema": schema}, + ) + + +def _bq_to_avro_schema(bq_columns): + fields = [] + avro_schema = {"type": "record", "name": "__root__", "fields": fields} + + for column in bq_columns: + doc = column.get("description") + name = column["name"] + type_ = BQ_TO_AVRO_TYPES[column["type"]] + mode = column.get("mode", "nullable").lower() + + if mode == "nullable": + type_ = ["null", type_] + + fields.append( + { + "name": name, + "type": type_, + "doc": doc, + } + ) + + return avro_schema + + +def _get_avro_bytes(rows, avro_schema): + avro_file = six.BytesIO() + for row in rows: + fastavro.schemaless_writer(avro_file, avro_schema, row) + return avro_file.getvalue() + + +def test_rows_raises_import_error( + mut, class_under_test, mock_client, monkeypatch): + monkeypatch.setattr(mut, 'fastavro', None) + reader = class_under_test( + [], + mock_client, + bigquery_storage_v1beta1.types.StreamPosition(), + {}, + ) + read_session = bigquery_storage_v1beta1.types.ReadSession() + + with pytest.raises(ImportError): + reader.rows(read_session) + + +def test_rows_w_empty_stream(class_under_test, mock_client): + bq_columns = [ + {"name": "int_col", "type": "int64"}, + ] + avro_schema = _bq_to_avro_schema(bq_columns) + read_session = _generate_read_session(avro_schema) + reader = class_under_test( + [], + mock_client, + bigquery_storage_v1beta1.types.StreamPosition(), + {}, + ) + + got = tuple(reader.rows(read_session)) + assert got == () + + +def test_rows_w_scalars(class_under_test, mock_client): + avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS) + read_session = _generate_read_session(avro_schema) + avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema) + + reader = class_under_test( + avro_blocks, + mock_client, + bigquery_storage_v1beta1.types.StreamPosition(), + {}, + ) + got = tuple(reader.rows(read_session)) + + expected = tuple(itertools.chain.from_iterable(SCALAR_BLOCKS)) + assert got == expected + + +def test_rows_w_reconnect(class_under_test, mock_client): + bq_columns = [ + {"name": "int_col", "type": "int64"}, + ] + avro_schema = _bq_to_avro_schema(bq_columns) + read_session = _generate_read_session(avro_schema) + bq_blocks_1 = [ + [{"int_col": 123}, {"int_col": 234}], + [{"int_col": 345}, {"int_col": 456}], + ] + avro_blocks_1 = _avro_blocks_w_deadline( + _bq_to_avro_blocks(bq_blocks_1, avro_schema), + ) + bq_blocks_2 = [ + [{"int_col": 567}, {"int_col": 789}], + [{"int_col": 890}], + ] + avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema) + mock_client.read_rows.return_value = avro_blocks_2 + stream_position = bigquery_storage_v1beta1.types.StreamPosition( + stream={'name': 'test'}, + ) + + reader = class_under_test( + avro_blocks_1, + mock_client, + stream_position, + {'metadata': {'test-key': 'test-value'}}, + ) + got = tuple(reader.rows(read_session)) + + expected = tuple(itertools.chain( + itertools.chain.from_iterable(bq_blocks_1), + itertools.chain.from_iterable(bq_blocks_2), + )) + + assert got == expected + mock_client.read_rows.assert_called_once_with( + bigquery_storage_v1beta1.types.StreamPosition( + stream={'name': 'test'}, + offset=4, + ), + metadata={'test-key': 'test-value'}, + ) + + +def test_to_dataframe_no_pandas_raises_import_error( + mut, class_under_test, mock_client, monkeypatch): + monkeypatch.setattr(mut, 'pandas', None) + reader = class_under_test( + [], + mock_client, + bigquery_storage_v1beta1.types.StreamPosition(), + {}, + ) + read_session = bigquery_storage_v1beta1.types.ReadSession() + + with pytest.raises(ImportError): + reader.to_dataframe(read_session) + + +def test_to_dataframe_no_fastavro_raises_import_error( + mut, class_under_test, mock_client, monkeypatch): + monkeypatch.setattr(mut, 'fastavro', None) + reader = class_under_test( + [], + mock_client, + bigquery_storage_v1beta1.types.StreamPosition(), + {}, + ) + read_session = bigquery_storage_v1beta1.types.ReadSession() + + with pytest.raises(ImportError): + reader.to_dataframe(read_session) + + +def test_to_dataframe_w_scalars(class_under_test): + avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS) + read_session = _generate_read_session(avro_schema) + avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema) + + reader = class_under_test( + avro_blocks, + mock_client, + bigquery_storage_v1beta1.types.StreamPosition(), + {}, + ) + got = reader.to_dataframe(read_session) + + expected = pandas.DataFrame( + list(itertools.chain.from_iterable(SCALAR_BLOCKS)), + ) + # fastavro provides its own UTC definition, so + # compare the timestamp columns separately. + got_ts = got['ts_col'] + got = got.drop(columns=['ts_col']) + expected_ts = expected['ts_col'] + expected = expected.drop(columns=['ts_col']) + + pandas.testing.assert_frame_equal( + got.reset_index(drop=True), # reset_index to ignore row labels + expected.reset_index(drop=True), + ) + pandas.testing.assert_series_equal( + got_ts.reset_index(drop=True), + expected_ts.reset_index(drop=True), + check_dtype=False, # fastavro's UTC means different dtype + check_datetimelike_compat=True, + ) + + +def test_copy_stream_position(mut): + read_position = bigquery_storage_v1beta1.types.StreamPosition( + stream={"name": "test"}, offset=41 + ) + got = mut._copy_stream_position(read_position) + assert got == read_position + got.offset = 42 + assert read_position.offset == 41 + + +def test_copy_stream_position_w_dict(mut): + read_position = {"stream": {"name": "test"}, "offset": 42} + got = mut._copy_stream_position(read_position) + assert got.stream.name == "test" + assert got.offset == 42 diff --git a/docs/conf.py b/docs/conf.py index a396990cc621..63a5e4a840a2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -321,7 +321,8 @@ 'google-gax': ('https://gax-python.readthedocs.io/en/latest/', None), 'grpc': ('https://grpc.io/grpc/python/', None), 'requests': ('http://docs.python-requests.org/en/master/', None), - 'pandas': ('http://pandas.pydata.org/pandas-docs/stable/', None), + 'fastavro': ('https://fastavro.readthedocs.io/en/stable/', None), + 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), 'python': ('https://docs.python.org/3', None), }