Skip to content

Commit

Permalink
sql: display the nr of server-side auto retries in exec/audit logs
Browse files Browse the repository at this point in the history
As originally implemented, the logic that produces the statement
execution logs and the audit logs produces one line of logging for
every statement *executed by the server*. If a statement is retried
automatically, there is one line of log per retry attempt.

This level of detail is desirable for troubleshooting because it
enables to analyze different things:

- the details of the transaction during the retry failures (the text
  of the retry error contains useful troubleshooting details).
- how long each individual attempt lasted (in the latency column);
- if/when the number of rows affected changes (in the rows affected
  column; this is needed for auditing).

However, this amount of detail introduces an element of surprise: a
user who does not understand how to distinguish "work sent by the
client" and "work executed by the server" (because, for example,
traditional databases usually conflate the two), may be surprised
by this behavior.

For example, to count how many stastements were sent by a client,
the user would need to be taught how to ignore those log entries
that report a retry error, and possibly be taught about the internals
of the execution machinery. This amount of effort is indesirable.

To alleviate this effort, and generally increase the readability of
the statement log files, this patch introduces an additional column at
the end of each log line, containing the current value of the
auto-retry counter.

With this patch implemented, all the log lines with this value set to
zero correspond to the work that was sent by a client, prior to any
retry attempt.

Release note (sql change): The logging format for statement
execution (as enabled by `sql.trace.log_statement_execute`) and audit
logs (as enabled by `ALTER TABLE .. EXPERIMENTAL AUDIT SET ...`) is
extended with one column that reports internal execution retries
performed automatically by the server. The value zero is logged for
the first retry attempt and thus corresponds to statements directly
receivd from a client.
  • Loading branch information
knz committed Jun 5, 2019
1 parent 93a1fdc commit c981058
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 16 deletions.
4 changes: 3 additions & 1 deletion pkg/sql/conn_executor_exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -668,7 +668,9 @@ func (ex *connExecutor) dispatchToExecutionEngine(
}
}

defer func() { planner.maybeLogStatement(ctx, "exec", res.RowsAffected(), res.Err()) }()
defer func() {
planner.maybeLogStatement(ctx, "exec", ex.extraTxnState.autoRetryCounter, res.RowsAffected(), res.Err())
}()

planner.statsCollector.PhaseTimes()[plannerEndLogicalPlan] = timeutil.Now()
ex.sessionTracing.TracePlanEnd(ctx, err)
Expand Down
34 changes: 19 additions & 15 deletions pkg/sql/exec_log.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@ import (
// The log format is currently as follows:
//
// Example audit log line:
// I180211 07:30:48.832004 317 sql/exec_log.go:90 [client=127.0.0.1:62503,user=root,n1] 13 exec "cockroach" {"ab"[53]:READ} "SELECT * FROM ab" {} 123.45 12 OK
// I180211 07:30:48.832004 317 sql/exec_log.go:90 [client=127.0.0.1:62503,user=root,n1] 13 exec "cockroach" {"ab"[53]:READ} "SELECT nonexistent FROM ab" {} 0.123 12 ERROR
// I180211 07:30:48.832004 317 sql/exec_log.go:90 [client=127.0.0.1:62503,user=root,n1] 13 exec "cockroach" {"ab"[53]:READ} "SELECT * FROM ab" {} 123.45 12 OK 0
// I180211 07:30:48.832004 317 sql/exec_log.go:90 [client=127.0.0.1:62503,user=root,n1] 13 exec "cockroach" {"ab"[53]:READ} "SELECT nonexistent FROM ab" {} 0.123 12 ERROR 0
// Example execution log:
// I180211 07:30:48.832004 317 sql/exec_log.go:90 [client=127.0.0.1:62503,user=root,n1] 13 exec "cockroach" {} "SELECT * FROM ab" {} 123.45 12 OK
// I180211 07:30:48.832004 317 sql/exec_log.go:90 [client=127.0.0.1:62503,user=root,n1] 13 exec "cockroach" {} "SELECT nonexistent FROM ab" {} 0.123 0 "column \"nonexistent\" not found"
// I180211 07:30:48.832004 317 sql/exec_log.go:90 [client=127.0.0.1:62503,user=root,n1] 13 exec "cockroach" {} "SELECT * FROM ab" {} 123.45 12 OK 0
// I180211 07:30:48.832004 317 sql/exec_log.go:90 [client=127.0.0.1:62503,user=root,n1] 13 exec "cockroach" {} "SELECT nonexistent FROM ab" {} 0.123 0 "column \"nonexistent\" not found" 0
//
// Explanation of fields:
// I180211 07:30:48.832004 317 sql/exec_log.go:90 [client=127.0.0.1:62503,user=root,n1] 13 exec "cockroach" {"ab"[53]:READ} "SELECT nonexistent FROM ab" {} 0.123 12 ERROR
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
// I180211 07:30:48.832004 317 sql/exec_log.go:90 [client=127.0.0.1:62503,user=root,n1] 13 exec "cockroach" {"ab"[53]:READ} "SELECT nonexistent FROM ab" {} 0.123 12 ERROR 0
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
// \ .../
// '- prefix generated by CockroachDB's standard log package. Contains:
//
Expand Down Expand Up @@ -79,6 +79,8 @@ import (
// - the number of rows that were produced. For troubleshooting.
// - the status of the query (OK for success, ERROR or full error
// message upon error). Needed for auditing and troubleshooting.
// - the number of times the statement was retried automatically
// by the server so far.

// logStatementsExecuteEnabled causes the Executor to log executed
// statements and, if any, resulting errors.
Expand All @@ -90,13 +92,15 @@ var logStatementsExecuteEnabled = settings.RegisterBoolSetting(

// maybeLogStatement conditionally records the current statement
// (p.curPlan) to the exec / audit logs.
func (p *planner) maybeLogStatement(ctx context.Context, lbl string, rows int, err error) {
func (p *planner) maybeLogStatement(
ctx context.Context, lbl string, numRetries, rows int, err error,
) {
p.maybeLogStatementInternal(
ctx, lbl, rows, err, p.statsCollector.PhaseTimes()[sessionQueryReceived])
ctx, lbl, numRetries, rows, err, p.statsCollector.PhaseTimes()[sessionQueryReceived])
}

func (p *planner) maybeLogStatementInternal(
ctx context.Context, lbl string, rows int, err error, startTime time.Time,
ctx context.Context, lbl string, numRetries, rows int, err error, startTime time.Time,
) {
// Note: if you find the code below crashing because p.execCfg == nil,
// do not add a test "if p.execCfg == nil { do nothing }" !
Expand Down Expand Up @@ -152,18 +156,18 @@ func (p *planner) maybeLogStatementInternal(
// Now log!
if auditEventsDetected {
logger := p.execCfg.AuditLogger
logger.Logf(ctx, "%s %q %s %q %s %.3f %d %s",
lbl, appName, logTrigger, stmtStr, plStr, age, rows, auditErrStr)
logger.Logf(ctx, "%s %q %s %q %s %.3f %d %s %d",
lbl, appName, logTrigger, stmtStr, plStr, age, rows, auditErrStr, numRetries)
}
if logExecuteEnabled {
logger := p.execCfg.ExecLogger
logger.Logf(ctx, "%s %q %s %q %s %.3f %d %q",
lbl, appName, logTrigger, stmtStr, plStr, age, rows, execErrStr)
logger.Logf(ctx, "%s %q %s %q %s %.3f %d %q %d",
lbl, appName, logTrigger, stmtStr, plStr, age, rows, execErrStr, numRetries)
}
if logV {
// Copy to the main log.
log.VEventf(ctx, 2, "%s %q %s %q %s %.3f %d %q",
lbl, appName, logTrigger, stmtStr, plStr, age, rows, execErrStr)
log.VEventf(ctx, 2, "%s %q %s %q %s %.3f %d %q %d",
lbl, appName, logTrigger, stmtStr, plStr, age, rows, execErrStr, numRetries)
}
}

Expand Down

0 comments on commit c981058

Please sign in to comment.