-
Notifications
You must be signed in to change notification settings - Fork 306
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: avoid policy tags 403 error in load_table_from_dataframe
#557
Changes from all commits
1f6e6d8
d4b6d32
3cdbcc7
fe1029d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
from google.cloud.bigquery_v2 import types | ||
|
||
|
||
_DEFAULT_VALUE = object() | ||
_STRUCT_TYPES = ("RECORD", "STRUCT") | ||
|
||
# SQL types reference: | ||
|
@@ -73,14 +74,18 @@ def __init__( | |
name, | ||
field_type, | ||
mode="NULLABLE", | ||
description=None, | ||
description=_DEFAULT_VALUE, | ||
fields=(), | ||
policy_tags=None, | ||
): | ||
self._name = name | ||
self._field_type = field_type | ||
self._mode = mode | ||
self._description = description | ||
self._properties = { | ||
"name": name, | ||
"type": field_type, | ||
} | ||
if mode is not None: | ||
self._properties["mode"] = mode.upper() | ||
if description is not _DEFAULT_VALUE: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @shollyman This is one of the key changes: we no longer set the resource value for "description" if it's not explicitly set. We already omit There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My default inclination would be for special handling for None values to happen at the places where it's significant, like when calling tables.update. It's also the case that schema fields can't be manipulated individually, so perhaps I'm simply just not thinking this through properly. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I called that out as a possibility in #558, but that'd require updating our field mask logic to support sub-fields, which gets into some hairy string parsing (perhaps not all that hairy, as it could be as simple as split on Also, it might mean that we'd have to introduce a field mask to our load job methods. Based on the error message we're seeing, it sounds like it's possible to make updates to fields like policy tags from a load job. |
||
self._properties["description"] = description | ||
self._fields = tuple(fields) | ||
self._policy_tags = policy_tags | ||
|
||
|
@@ -98,7 +103,7 @@ def from_api_repr(cls, api_repr): | |
""" | ||
# Handle optional properties with default values | ||
mode = api_repr.get("mode", "NULLABLE") | ||
description = api_repr.get("description") | ||
description = api_repr.get("description", _DEFAULT_VALUE) | ||
fields = api_repr.get("fields", ()) | ||
|
||
return cls( | ||
|
@@ -113,7 +118,7 @@ def from_api_repr(cls, api_repr): | |
@property | ||
def name(self): | ||
"""str: The name of the field.""" | ||
return self._name | ||
return self._properties["name"] | ||
|
||
@property | ||
def field_type(self): | ||
|
@@ -122,7 +127,7 @@ def field_type(self): | |
See: | ||
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type | ||
""" | ||
return self._field_type | ||
return self._properties["type"] | ||
|
||
@property | ||
def mode(self): | ||
|
@@ -131,17 +136,17 @@ def mode(self): | |
See: | ||
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode | ||
""" | ||
return self._mode | ||
return self._properties.get("mode") | ||
|
||
@property | ||
def is_nullable(self): | ||
"""bool: whether 'mode' is 'nullable'.""" | ||
return self._mode == "NULLABLE" | ||
return self.mode == "NULLABLE" | ||
|
||
@property | ||
def description(self): | ||
"""Optional[str]: description for the field.""" | ||
return self._description | ||
return self._properties.get("description") | ||
|
||
@property | ||
def fields(self): | ||
|
@@ -164,13 +169,7 @@ def to_api_repr(self): | |
Returns: | ||
Dict: A dictionary representing the SchemaField in a serialized form. | ||
""" | ||
# Put together the basic representation. See http://bit.ly/2hOAT5u. | ||
answer = { | ||
"mode": self.mode.upper(), | ||
"name": self.name, | ||
"type": self.field_type.upper(), | ||
"description": self.description, | ||
} | ||
answer = self._properties.copy() | ||
|
||
# If this is a RECORD type, then sub-fields are also included, | ||
# add this to the serialized representation. | ||
|
@@ -193,10 +192,10 @@ def _key(self): | |
Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`. | ||
""" | ||
return ( | ||
self._name, | ||
self._field_type.upper(), | ||
self._mode.upper(), | ||
self._description, | ||
self.name, | ||
self.field_type.upper(), | ||
self.mode.upper(), | ||
self.description, | ||
self._fields, | ||
self._policy_tags, | ||
) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the actual bug fix. Rather than populate all properties of schema field from the table schema, just populate the minimum we need to convert to parquet/CSV and then upload
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We'll need to revisit this for parameterization constraints, but that's a problem for future Tim.