Skip to content

Commit

Permalink
feat: support BI Engine statistics in query job (#1144)
Browse files Browse the repository at this point in the history
* chore: Add support for accessing BI Engine statistics

The REST API returns BiEngineStatistics for a query which denotes if the
query was accelerated by BI Engine or not. This commit adds the
necessary function to access this information for executed queries.

* fix: Removed enums and replaced with string constants

* fix: Fixed logic for creating BIEngineStats and added test case

* Attempt at mypy fix

Co-authored-by: Tim Swast <[email protected]>
  • Loading branch information
anmolsahoo25 and tswast authored Feb 18, 2022
1 parent 39ade39 commit 7482549
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 0 deletions.
47 changes: 47 additions & 0 deletions google/cloud/bigquery/job/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,44 @@ def _to_api_repr_table_defs(value):
return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()}


class BiEngineReason(typing.NamedTuple):
"""Reason for BI Engine acceleration failure
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#bienginereason
"""

code: str = "CODE_UNSPECIFIED"

reason: str = ""

@classmethod
def from_api_repr(cls, reason: Dict[str, str]) -> "BiEngineReason":
return cls(reason.get("code", "CODE_UNSPECIFIED"), reason.get("message", ""))


class BiEngineStats(typing.NamedTuple):
"""Statistics for a BI Engine query
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#bienginestatistics
"""

mode: str = "ACCELERATION_MODE_UNSPECIFIED"
""" Specifies which mode of BI Engine acceleration was performed (if any)
"""

reasons: List[BiEngineReason] = []
""" Contains explanatory messages in case of DISABLED / PARTIAL acceleration
"""

@classmethod
def from_api_repr(cls, stats: Dict[str, Any]) -> "BiEngineStats":
mode = stats.get("biEngineMode", "ACCELERATION_MODE_UNSPECIFIED")
reasons = [
BiEngineReason.from_api_repr(r) for r in stats.get("biEngineReasons", [])
]
return cls(mode, reasons)


class DmlStats(typing.NamedTuple):
"""Detailed statistics for DML statements.
Expand Down Expand Up @@ -1191,6 +1229,15 @@ def dml_stats(self) -> Optional[DmlStats]:
else:
return DmlStats.from_api_repr(stats)

@property
def bi_engine_stats(self) -> Optional[BiEngineStats]:
stats = self._job_statistics().get("biEngineStatistics")

if stats is None:
return None
else:
return BiEngineStats.from_api_repr(stats)

def _blocking_poll(self, timeout=None, **kwargs):
self._done_timeout = timeout
self._transport_timeout = timeout
Expand Down
17 changes: 17 additions & 0 deletions tests/unit/job/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -877,6 +877,23 @@ def test_estimated_bytes_processed(self):
query_stats["estimatedBytesProcessed"] = str(est_bytes)
self.assertEqual(job.estimated_bytes_processed, est_bytes)

def test_bi_engine_stats(self):
from google.cloud.bigquery.job.query import BiEngineStats

client = _make_client(project=self.PROJECT)
job = self._make_one(self.JOB_ID, self.QUERY, client)
assert job.bi_engine_stats is None

statistics = job._properties["statistics"] = {}
assert job.bi_engine_stats is None

query_stats = statistics["query"] = {}
assert job.bi_engine_stats is None

query_stats["biEngineStatistics"] = {"biEngineMode": "FULL"}
assert isinstance(job.bi_engine_stats, BiEngineStats)
assert job.bi_engine_stats.mode == "FULL"

def test_dml_stats(self):
from google.cloud.bigquery.job.query import DmlStats

Expand Down
56 changes: 56 additions & 0 deletions tests/unit/job/test_query_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,62 @@
from .helpers import _Base


class TestBiEngineStats:
@staticmethod
def _get_target_class():
from google.cloud.bigquery.job.query import BiEngineStats

return BiEngineStats

def _make_one(self, *args, **kw):
return self._get_target_class()(*args, **kw)

def test_ctor_defaults(self):
bi_engine_stats = self._make_one()
assert bi_engine_stats.mode == "ACCELERATION_MODE_UNSPECIFIED"
assert bi_engine_stats.reasons == []

def test_from_api_repr_unspecified(self):
klass = self._get_target_class()
result = klass.from_api_repr({"biEngineMode": "ACCELERATION_MODE_UNSPECIFIED"})

assert isinstance(result, klass)
assert result.mode == "ACCELERATION_MODE_UNSPECIFIED"
assert result.reasons == []

def test_from_api_repr_full(self):
klass = self._get_target_class()
result = klass.from_api_repr({"biEngineMode": "FULL"})

assert isinstance(result, klass)
assert result.mode == "FULL"
assert result.reasons == []

def test_from_api_repr_disabled(self):
klass = self._get_target_class()
result = klass.from_api_repr(
{
"biEngineMode": "DISABLED",
"biEngineReasons": [
{
"code": "OTHER_REASON",
"message": "Unable to support input table xyz due to an internal error.",
}
],
}
)

assert isinstance(result, klass)
assert result.mode == "DISABLED"

reason = result.reasons[0]
assert reason.code == "OTHER_REASON"
assert (
reason.reason
== "Unable to support input table xyz due to an internal error."
)


class TestDmlStats:
@staticmethod
def _get_target_class():
Expand Down

0 comments on commit 7482549

Please sign in to comment.