Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sentry: panic due to decoding unset EncDatum #36834

Closed
cockroach-teamcity opened this issue Apr 15, 2019 · 2 comments
Closed

sentry: panic due to decoding unset EncDatum #36834

cockroach-teamcity opened this issue Apr 15, 2019 · 2 comments
Labels
C-bug Code not up to spec/doc, specs & docs deemed correct. Solution expected to change code/behavior. O-sentry Originated from an in-the-wild panic report.

Comments

@cockroach-teamcity
Copy link
Member

This issue was autofiled by Sentry. It represents a crash or reported error on a live cluster with telemetry enabled.

Sentry link: https://sentry.io/organizations/cockroach-labs/issues/986216120/?project=164528&referrer=webhooks_plugin

Panic message:

conn_executor.go:645: panic while executing 1 statements: SELECT , _ FROM .. WHERE ((_ IS ) AND ( = $1)) AND (_ = $2) ORDER BY _ DESC LIMIT $3: caused by

Stacktrace (expand for inline code snippets):

r := recover()
h.ex.closeWrapper(ctx, r)
}()
in pkg/sql.(*Server).ServeConn.func1
/usr/local/go/src/runtime/asm_amd64.s#L572-L574 in runtime.call32
/usr/local/go/src/runtime/panic.go#L501-L503 in runtime.gopanic
if ed.encoded == nil {
panic("decoding unset EncDatum")
}
in pkg/sql/sqlbase.(*EncDatum).EnsureDecoded
func (eh *exprHelper) IndexedVarEval(idx int, ctx *tree.EvalContext) (tree.Datum, error) {
err := eh.row[idx].EnsureDecoded(&eh.types[idx], &eh.datumAlloc)
if err != nil {
in pkg/sql/distsqlrun.(*exprHelper).IndexedVarEval
}
return ctx.IVarContainer.IndexedVarEval(v.Idx, ctx)
}
in pkg/sql/sem/tree.(*IndexedVar).Eval
func (expr *ComparisonExpr) Eval(ctx *EvalContext) (Datum, error) {
left, err := expr.Left.(TypedExpr).Eval(ctx)
if err != nil {
in pkg/sql/sem/tree.(*ComparisonExpr).Eval
d, err := filter.Eval(evalCtx)
if err != nil {
in pkg/sql/sqlbase.RunFilter
eh.evalCtx.PushIVarContainer(eh)
pass, err := sqlbase.RunFilter(eh.expr, eh.evalCtx)
eh.evalCtx.PopIVarContainer()
in pkg/sql/distsqlrun.(*exprHelper).evalFilter
// Filtering.
passes, err := h.filter.evalFilter(row)
if err != nil {
in pkg/sql/distsqlrun.(*ProcOutputHelper).ProcessRow
func (pb *ProcessorBase) ProcessRowHelper(row sqlbase.EncDatumRow) sqlbase.EncDatumRow {
outRow, ok, err := pb.out.ProcessRow(pb.Ctx, row)
if err != nil {
in pkg/sql/distsqlrun.(*ProcessorBase).ProcessRowHelper
if outRow := tr.ProcessRowHelper(row); outRow != nil {
return outRow, nil
in pkg/sql/distsqlrun.(*tableReader).Next
for {
row, meta := src.Next()
// Emit the row; stop if no more rows are needed.
in pkg/sql/distsqlrun.Run
ctx = pb.self.Start(ctx)
Run(ctx, pb.self, pb.out.output)
if wg != nil {
in pkg/sql/distsqlrun.(*ProcessorBase).Run
if len(f.processors) > 0 {
f.processors[len(f.processors)-1].Run(ctx, nil)
}
in pkg/sql/distsqlrun.(*Flow).StartSync
// TODO(radu): this should go through the flow scheduler.
if err := flow.StartSync(ctx, func() {}); err != nil {
log.Fatalf(ctx, "unexpected error from syncFlow.Start(): %s "+
in pkg/sql.(*DistSQLPlanner).Run
dsp.FinalizePlan(planCtx, &physPlan)
dsp.Run(planCtx, txn, &physPlan, recv, evalCtx, nil /* finishedSetupFn */)
}
in pkg/sql.(*DistSQLPlanner).PlanAndRun
// the planner whether or not to plan remote table readers.
ex.server.cfg.DistSQLPlanner.PlanAndRun(
ctx, evalCtx, &planCtx, planner.txn, planner.curPlan.plan, recv)
in pkg/sql.(*connExecutor).execWithDistSQLEngine
ex.sessionTracing.TraceExecStart(ctx, "distributed")
err = ex.execWithDistSQLEngine(ctx, planner, stmt.AST.StatementType(), res, distributePlan)
} else {
in pkg/sql.(*connExecutor).dispatchToExecutionEngine
p.autoCommit = os.ImplicitTxn.Get() && !ex.server.cfg.TestingKnobs.DisableAutoCommit
if err := ex.dispatchToExecutionEngine(ctx, stmt, p, res); err != nil {
return nil, nil, err
in pkg/sql.(*connExecutor).execStmtInOpenState
case stateOpen:
ev, payload, err = ex.execStmtInOpenState(ctx, stmt, pinfo, res)
switch ev.(type) {
in pkg/sql.(*connExecutor).execStmt
ctx := withStatement(ex.Ctx(), ex.curStmt)
ev, payload, err = ex.execStmt(ctx, curStmt, stmtRes, pinfo, pos)
if err != nil {
in pkg/sql.(*connExecutor).run
}()
return h.ex.run(ctx, s.pool, reserved, cancel)
}
in pkg/sql.(*Server).ServeConn
go func() {
writerErr = sqlServer.ServeConn(ctx, connHandler, reserved, cancelConn)
// TODO(andrei): Should we sometimes transmit the writerErr's to the
in pkg/sql/pgwire.(*conn).serveImpl.func4

pkg/sql/conn_executor.go in pkg/sql.(*Server).ServeConn.func1 at line 387
/usr/local/go/src/runtime/asm_amd64.s in runtime.call32 at line 573
/usr/local/go/src/runtime/panic.go in runtime.gopanic at line 502
pkg/sql/sqlbase/encoded_datum.go in pkg/sql/sqlbase.(*EncDatum).EnsureDecoded at line 210
pkg/sql/distsqlrun/expr.go in pkg/sql/distsqlrun.(*exprHelper).IndexedVarEval at line 132
pkg/sql/sem/tree/indexed_vars.go in pkg/sql/sem/tree.(*IndexedVar).Eval at line 80
pkg/sql/sem/tree/eval.go in pkg/sql/sem/tree.(*ComparisonExpr).Eval at line 3342
pkg/sql/sqlbase/expr_filter.go in pkg/sql/sqlbase.RunFilter at line 26
pkg/sql/distsqlrun/expr.go in pkg/sql/distsqlrun.(*exprHelper).evalFilter at line 172
pkg/sql/distsqlrun/processors.go in pkg/sql/distsqlrun.(*ProcOutputHelper).ProcessRow at line 331
pkg/sql/distsqlrun/processors.go in pkg/sql/distsqlrun.(*ProcessorBase).ProcessRowHelper at line 708
pkg/sql/distsqlrun/tablereader.go in pkg/sql/distsqlrun.(*tableReader).Next at line 260
pkg/sql/distsqlrun/base.go in pkg/sql/distsqlrun.Run at line 172
pkg/sql/distsqlrun/processors.go in pkg/sql/distsqlrun.(*ProcessorBase).Run at line 731
pkg/sql/distsqlrun/flow.go in pkg/sql/distsqlrun.(*Flow).StartSync at line 607
pkg/sql/distsql_running.go in pkg/sql.(*DistSQLPlanner).Run at line 253
pkg/sql/distsql_running.go in pkg/sql.(*DistSQLPlanner).PlanAndRun at line 758
pkg/sql/conn_executor_exec.go in pkg/sql.(*connExecutor).execWithDistSQLEngine at line 982
pkg/sql/conn_executor_exec.go in pkg/sql.(*connExecutor).dispatchToExecutionEngine at line 824
pkg/sql/conn_executor_exec.go in pkg/sql.(*connExecutor).execStmtInOpenState at line 402
pkg/sql/conn_executor_exec.go in pkg/sql.(*connExecutor).execStmt at line 96
pkg/sql/conn_executor.go in pkg/sql.(*connExecutor).run at line 1175
pkg/sql/conn_executor.go in pkg/sql.(*Server).ServeConn at line 389
pkg/sql/pgwire/conn.go in pkg/sql/pgwire.(*conn).serveImpl.func4 at line 313
Tag Value
Cockroach Release v2.1.5
Cockroach SHA: 1634c6b
Platform linux amd64
Distribution CCL
Environment v2.1.5
Command server
Go Version go1.10.7
# of CPUs 8
# of Goroutines 389
@cockroach-teamcity cockroach-teamcity added C-bug Code not up to spec/doc, specs & docs deemed correct. Solution expected to change code/behavior. O-sentry Originated from an in-the-wild panic report. labels Apr 15, 2019
@asubiotto asubiotto changed the title sentry: conn_executor.go:645: panic while executing 1 statements: SELECT _, _ FROM _._._ WHERE ((_ IS _) AND (_ = $1)) AND (_ = $2) ORDER BY _ DESC LIMIT $3: caused by <redacted> sentry: panic due to decoding unset EncDatum Apr 15, 2019
@asubiotto
Copy link
Contributor

Related to #36356. Keeping it open as the stacktraces are different. From a cursory look it looks like both use an ORDER BY _ DESC LIMIT _. Might be hard to do anything else if we don't have the schema but let's attempt to repro and close if we can't.

tbg added a commit to tbg/cockroach that referenced this issue Nov 29, 2019
We could end up splitting between column families of the same row,
which is illegal. Unfortunately the KV layer has to uphold invariants
here that it doesn't quite have introspection into, but after this
commit it hopefully stops breaking them.

See cockroachdb#16344 for some
additional history.

Possibly the solution for cockroachdb#39794.
Possibly the solution for cockroachdb#36834.
Possibly the solution for cockroachdb#36356.

(Intentionally not closing the above; leaving that to the SQL folks).

Closes cockroachdb#42056 (which is the go-to for reading up on this issue).

Release note (bug fix): prevent a number of panics from the SQL layer
caused by an invalid range split. These would usually manifest with
messages mentioning encoding errors ("found null on not null column" but
also possibly various others).
tbg added a commit to tbg/cockroach that referenced this issue Dec 2, 2019
We could end up splitting between column families of the same row,
which is illegal. Unfortunately the KV layer has to uphold invariants
here that it doesn't quite have introspection into, but after this
commit it hopefully stops breaking them.

See cockroachdb#16344 for some
additional history.

Possibly the solution for cockroachdb#39794.
Possibly the solution for cockroachdb#36834.
Possibly the solution for cockroachdb#36356.

(Intentionally not closing the above; leaving that to the SQL folks).

Closes cockroachdb#42056 (which is the go-to for reading up on this issue).

Release note (bug fix): prevent a number of panics from the SQL layer
caused by an invalid range split. These would usually manifest with
messages mentioning encoding errors ("found null on not null column" but
also possibly various others).
craig bot pushed a commit that referenced this issue Dec 2, 2019
42833: storage: call EnsureSafeSplitKey during load-based splits r=bdarnell a=tbg

We could end up splitting between column families of the same row,
which is illegal. Unfortunately the KV layer has to uphold invariants
here that it doesn't quite have introspection into, but after this
commit it hopefully stops breaking them.

See #16344 for some
additional history.

Possibly the solution for #39794.
Possibly the solution for #36834.
Possibly the solution for #36356.

(Intentionally not closing the above; leaving that to the SQL folks).

Closes #42056 (which is the go-to for reading up on this issue).

Release note (bug fix): prevent a number of panics from the SQL layer
caused by an invalid range split. These would usually manifest with
messages mentioning encoding errors ("found null on not null column" but
also possibly various others).

Co-authored-by: Tobias Schottdorf <[email protected]>
tbg added a commit to tbg/cockroach that referenced this issue Dec 3, 2019
We could end up splitting between column families of the same row,
which is illegal. Unfortunately the KV layer has to uphold invariants
here that it doesn't quite have introspection into, but after this
commit it hopefully stops breaking them.

See cockroachdb#16344 for some
additional history.

Possibly the solution for cockroachdb#39794.
Possibly the solution for cockroachdb#36834.
Possibly the solution for cockroachdb#36356.

(Intentionally not closing the above; leaving that to the SQL folks).

Closes cockroachdb#42056 (which is the go-to for reading up on this issue).

Release note (bug fix): prevent a number of panics from the SQL layer
caused by an invalid range split. These would usually manifest with
messages mentioning encoding errors ("found null on not null column" but
also possibly various others).
@irfansharif
Copy link
Contributor

I realize this was left open for the SQL team to address, but this is almost certainly fixed by #42833. So I'm going to go ahead and close it (it'll still be searchable should anyone run into the same).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
C-bug Code not up to spec/doc, specs & docs deemed correct. Solution expected to change code/behavior. O-sentry Originated from an in-the-wild panic report.
Projects
None yet
Development

No branches or pull requests

3 participants