Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(bigquery): Add Job#parent_job_id and Job#script_statistics #4926

Merged
merged 5 commits into from
Mar 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions google-cloud-bigquery/acceptance/bigquery/advanced_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,66 @@
assert_equal -Float::INFINITY, row[:negative_infinity]
end

it "executes SQL with multiple statements and creates child jobs with script_statistics" do
multi_statement_sql = <<~SQL
-- Declare a variable to hold names as an array.
DECLARE top_names ARRAY<STRING>;
-- Build an array of the top 100 names from the year 2017.
SET top_names = (
SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
FROM `bigquery-public-data.usa_names.usa_1910_current`
WHERE year = 2017
);
-- Which names appear as words in Shakespeare's plays?
SELECT
name AS shakespeare_name
FROM UNNEST(top_names) AS name
WHERE name IN (
SELECT word
FROM `bigquery-public-data.samples.shakespeare`
);
SQL
quartzmo marked this conversation as resolved.
Show resolved Hide resolved

job = bigquery.query_job multi_statement_sql

job.must_be_kind_of Google::Cloud::Bigquery::QueryJob
job.wait_until_done!
job.wont_be :failed?
job.num_child_jobs.must_equal 2
job.parent_job_id.must_be :nil?

job.script_statistics.must_be :nil?

child_jobs = bigquery.jobs parent_job: job
child_jobs.count.must_equal 2

child_jobs[0].parent_job_id.must_equal job.job_id
child_jobs[0].script_statistics.must_be_kind_of Google::Cloud::Bigquery::Job::ScriptStatistics
child_jobs[0].script_statistics.evaluation_kind.must_equal "STATEMENT"
child_jobs[0].script_statistics.stack_frames.wont_be :nil?
child_jobs[0].script_statistics.stack_frames.must_be_kind_of Array
child_jobs[0].script_statistics.stack_frames.count.must_equal 1
child_jobs[0].script_statistics.stack_frames[0].must_be_kind_of Google::Cloud::Bigquery::Job::ScriptStackFrame
child_jobs[0].script_statistics.stack_frames[0].start_line.must_equal 10
child_jobs[0].script_statistics.stack_frames[0].start_column.must_equal 1
child_jobs[0].script_statistics.stack_frames[0].end_line.must_equal 16
child_jobs[0].script_statistics.stack_frames[0].end_column.must_equal 2
child_jobs[0].script_statistics.stack_frames[0].text.length.must_be :>, 0

child_jobs[1].parent_job_id.must_equal job.job_id
child_jobs[1].script_statistics.must_be_kind_of Google::Cloud::Bigquery::Job::ScriptStatistics
child_jobs[1].script_statistics.evaluation_kind.must_equal "EXPRESSION"
child_jobs[1].script_statistics.stack_frames.wont_be :nil?
child_jobs[1].script_statistics.stack_frames.must_be_kind_of Array
child_jobs[1].script_statistics.stack_frames.count.must_equal 1
child_jobs[1].script_statistics.stack_frames[0].must_be_kind_of Google::Cloud::Bigquery::Job::ScriptStackFrame
child_jobs[1].script_statistics.stack_frames[0].start_line.must_equal 4
child_jobs[1].script_statistics.stack_frames[0].start_column.must_equal 17
child_jobs[1].script_statistics.stack_frames[0].end_line.must_equal 8
child_jobs[1].script_statistics.stack_frames[0].end_column.must_equal 2
child_jobs[1].script_statistics.stack_frames[0].text.length.must_be :>, 0
end

def assert_rows_equal returned_row, example_row
returned_row[:id].must_equal example_row[:id]
returned_row[:name].must_equal example_row[:name]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@
job.must_be_kind_of Google::Cloud::Bigquery::QueryJob
job.wait_until_done!
job.wont_be :failed?
job.num_child_jobs.must_equal 0
job.parent_job_id.must_be :nil?

job.time_partitioning_type.must_equal "DAY"
job.time_partitioning_field.must_equal "dob"
Expand Down
198 changes: 198 additions & 0 deletions google-cloud-bigquery/lib/google/cloud/bigquery/job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,72 @@ def ended_at
Convert.millis_to_time @gapi.statistics.end_time
end

##
# The number of child jobs executed.
#
# @return [Integer] The number of child jobs executed.
#
def num_child_jobs
@gapi.statistics.num_child_jobs || 0
end

##
# If this is a child job, the id of the parent.
#
# @return [String, nil] The ID of the parent job, or `nil` if not a child job.
#
def parent_job_id
@gapi.statistics.parent_job_id
end

##
# The statistics including stack frames for a child job of a script.
#
# @return [Google::Cloud::Bigquery::Job::ScriptStatistics, nil] The script statistics, or `nil` if the job is
# not a child job.
#
# @example
# require "google/cloud/bigquery"
#
# bigquery = Google::Cloud::Bigquery.new
#
# multi_statement_sql = <<~SQL
# -- Declare a variable to hold names as an array.
# DECLARE top_names ARRAY<STRING>;
# -- Build an array of the top 100 names from the year 2017.
# SET top_names = (
# SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
# FROM `bigquery-public-data.usa_names.usa_1910_current`
# WHERE year = 2017
# );
# -- Which names appear as words in Shakespeare's plays?
# SELECT
# name AS shakespeare_name
# FROM UNNEST(top_names) AS name
# WHERE name IN (
# SELECT word
# FROM `bigquery-public-data.samples.shakespeare`
# );
# SQL
#
# job = bigquery.query_job multi_statement_sql
#
# job.wait_until_done!
#
# child_jobs = bigquery.jobs parent_job: job
#
# child_jobs.each do |child_job|
# script_statistics = child_job.script_statistics
# puts script_statistics.evaluation_kind
# script_statistics.stack_frames.each do |stack_frame|
# puts stack_frame.text
# end
# end
#
def script_statistics
ScriptStatistics.from_gapi @gapi.statistics.script_statistics if @gapi.statistics.script_statistics
end

##
# The configuration for the job. Returns a hash.
#
Expand Down Expand Up @@ -423,6 +489,138 @@ def self.klass_for gapi
end
end

##
# Represents statistics for a child job of a script.
#
# @attr_reader [String] evaluation_kind Indicates the type of child job. Possible values include `STATEMENT` and
# `EXPRESSION`.
# @attr_reader [Array<Google::Cloud::Bigquery::Job::ScriptStackFrame>] stack_frames Stack trace where the
# current evaluation happened. Shows line/column/procedure name of each frame on the stack at the point where
# the current evaluation happened. The leaf frame is first, the primary script is last.
#
# @example
# require "google/cloud/bigquery"
#
# bigquery = Google::Cloud::Bigquery.new
#
# multi_statement_sql = <<~SQL
# -- Declare a variable to hold names as an array.
# DECLARE top_names ARRAY<STRING>;
# -- Build an array of the top 100 names from the year 2017.
# SET top_names = (
# SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
# FROM `bigquery-public-data.usa_names.usa_1910_current`
# WHERE year = 2017
# );
# -- Which names appear as words in Shakespeare's plays?
# SELECT
# name AS shakespeare_name
# FROM UNNEST(top_names) AS name
# WHERE name IN (
# SELECT word
# FROM `bigquery-public-data.samples.shakespeare`
# );
# SQL
#
# job = bigquery.query_job multi_statement_sql
#
# job.wait_until_done!
#
# child_jobs = bigquery.jobs parent_job: job
#
# child_jobs.each do |child_job|
# script_statistics = child_job.script_statistics
# puts script_statistics.evaluation_kind
# script_statistics.stack_frames.each do |stack_frame|
# puts stack_frame.text
# end
# end
#
class ScriptStatistics
attr_reader :evaluation_kind, :stack_frames

##
# @private Creates a new ScriptStatistics instance.
def initialize evaluation_kind, stack_frames
@evaluation_kind = evaluation_kind
@stack_frames = stack_frames
end

##
# @private New ScriptStatistics from a statistics.script_statistics object.
def self.from_gapi gapi
frames = Array(gapi.stack_frames).map { |g| ScriptStackFrame.from_gapi g }
new gapi.evaluation_kind, frames
end
end

##
# Represents a stack frame showing the line/column/procedure name where the current evaluation happened.
#
# @attr_reader [Integer] start_line One-based start line.
# @attr_reader [Integer] start_column One-based start column.
# @attr_reader [Integer] end_line One-based end line.
# @attr_reader [Integer] end_column One-based end column.
# @attr_reader [String] text Text of the current statement/expression.
#
# @example
# require "google/cloud/bigquery"
#
# bigquery = Google::Cloud::Bigquery.new
#
# multi_statement_sql = <<~SQL
# -- Declare a variable to hold names as an array.
# DECLARE top_names ARRAY<STRING>;
# -- Build an array of the top 100 names from the year 2017.
# SET top_names = (
# SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
# FROM `bigquery-public-data.usa_names.usa_1910_current`
# WHERE year = 2017
# );
# -- Which names appear as words in Shakespeare's plays?
# SELECT
# name AS shakespeare_name
# FROM UNNEST(top_names) AS name
# WHERE name IN (
# SELECT word
# FROM `bigquery-public-data.samples.shakespeare`
# );
# SQL
#
# job = bigquery.query_job multi_statement_sql
#
# job.wait_until_done!
#
# child_jobs = bigquery.jobs parent_job: job
#
# child_jobs.each do |child_job|
# script_statistics = child_job.script_statistics
# puts script_statistics.evaluation_kind
# script_statistics.stack_frames.each do |stack_frame|
# puts stack_frame.text
# end
# end
#
class ScriptStackFrame
attr_reader :start_line, :start_column, :end_line, :end_column, :text

##
# @private Creates a new ScriptStackFrame instance.
def initialize start_line, start_column, end_line, end_column, text
@start_line = start_line
@start_column = start_column
@end_line = end_line
@end_column = end_column
@text = text
end

##
# @private New ScriptStackFrame from a statistics.script_statistics[].stack_frames element.
def self.from_gapi gapi
new gapi.start_line, gapi.start_column, gapi.end_line, gapi.end_column, gapi.text
end
end

protected

##
Expand Down
78 changes: 66 additions & 12 deletions google-cloud-bigquery/lib/google/cloud/bigquery/project.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1084,18 +1084,22 @@ def job job_id, location: nil
# part of the larger set of results to view. Optional.
# @param [Integer] max Maximum number of jobs to return. Optional.
# @param [String] filter A filter for job state. Optional.
# @param [Time] min_created_at Min value for {Job#created_at}. When
# provided, only jobs created after or at this time are returned.
# Optional.
# @param [Time] max_created_at Max value for {Job#created_at}. When
# provided, only jobs created before or at this time are returned.
# Optional.
#
# Acceptable values are:
#
# * `done` - Finished jobs
# * `pending` - Pending jobs
# * `running` - Running jobs
# @param [Time] min_created_at Min value for {Job#created_at}. When
# provided, only jobs created after or at this time are returned.
# Optional.
# @param [Time] max_created_at Max value for {Job#created_at}. When
# provided, only jobs created before or at this time are returned.
# Optional.
# @param [Google::Cloud::Bigquery::Job, String] parent_job A job
# object or a job ID. If set, retrieve only child jobs of the
# specified parent. Optional. See {Job#job_id}, {Job#num_child_jobs},
# and {Job#parent_job_id}.
#
# @return [Array<Google::Cloud::Bigquery::Job>] (See
# {Google::Cloud::Bigquery::Job::List})
Expand Down Expand Up @@ -1144,13 +1148,63 @@ def job job_id, location: nil
# # process job
# end
#
def jobs all: nil, token: nil, max: nil, filter: nil,
min_created_at: nil, max_created_at: nil
# @example Retrieve child jobs by setting `parent_job`:
# require "google/cloud/bigquery"
#
# bigquery = Google::Cloud::Bigquery.new
#
# multi_statement_sql = <<~SQL
# -- Declare a variable to hold names as an array.
# DECLARE top_names ARRAY<STRING>;
# -- Build an array of the top 100 names from the year 2017.
# SET top_names = (
# SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
# FROM `bigquery-public-data.usa_names.usa_1910_current`
# WHERE year = 2017
# );
# -- Which names appear as words in Shakespeare's plays?
# SELECT
# name AS shakespeare_name
# FROM UNNEST(top_names) AS name
# WHERE name IN (
# SELECT word
# FROM `bigquery-public-data.samples.shakespeare`
# );
# SQL
#
# job = bigquery.query_job multi_statement_sql
#
# job.wait_until_done!
#
# child_jobs = bigquery.jobs parent_job: job
#
# child_jobs.each do |child_job|
# script_statistics = child_job.script_statistics
# puts script_statistics.evaluation_kind
# script_statistics.stack_frames.each do |stack_frame|
# puts stack_frame.text
# end
# end
#
def jobs all: nil,
token: nil,
max: nil,
filter: nil,
min_created_at: nil,
max_created_at: nil,
parent_job: nil
ensure_service!
options = { all: all, token: token, max: max, filter: filter, min_created_at: min_created_at,
max_created_at: max_created_at }
gapi = service.list_jobs options
Job::List.from_gapi gapi, service, options
parent_job = parent_job.job_id if parent_job.is_a? Job
options = {
parent_job_id: parent_job,
all: all,
token: token,
max: max, filter: filter,
min_created_at: min_created_at,
max_created_at: max_created_at
}
gapi = service.list_jobs(**options)
Job::List.from_gapi gapi, service, **options
end

##
Expand Down
Loading