Skip to content

Commit

Permalink
sql,jobs: short-term fix for UndefinedColumn job_type error
Browse files Browse the repository at this point in the history
In #106762 we noticed that if
a query is executed with an AS OF SYSTEM TIME clause that picks a transaction
timestamp before the job_type migration, then parts of the jobs
infrastructure will attempt to query the job_type column even though it
doesn't exist at the transaction's timestamp.

As a short term fix, when we encounter an `UndefinedColumn` error for
the `job_type` column in `crdb_internal.jobs` we
generate a synthetic retryable error so that the txn is pushed to a
higher timestamp at which the upgrade will have completed and the
`job_type` column will be visible. The longer term fix is being tracked
in #106764.

We are intentionally approaching this issue with a whack-a-mole
approach to stabilize the tests the are running into this issue. We
think time is better spent designing and investing in the longer term
solution that will be tracked in #106764.

Fixes: #107169
Informs: #106762
Release note: None
  • Loading branch information
adityamaru committed Jul 25, 2023
1 parent 451d761 commit ac55715
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 3 deletions.
1 change: 1 addition & 0 deletions pkg/jobs/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ go_library(
"//pkg/sql/catalog/descpb",
"//pkg/sql/catalog/descs",
"//pkg/sql/isql",
"//pkg/sql/pgwire/pgcode",
"//pkg/sql/pgwire/pgerror",
"//pkg/sql/protoreflect",
"//pkg/sql/sem/builtins",
Expand Down
32 changes: 32 additions & 0 deletions pkg/jobs/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import (
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/sql/isql"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/errors"
)
Expand Down Expand Up @@ -153,3 +155,33 @@ func JobExists(
}
return row != nil, nil
}

// IsJobTypeColumnDoesNotExistError returns true if the error is of the form
// `column "job_type" does not exist`.
func isJobTypeColumnDoesNotExistError(err error) bool {
return pgerror.GetPGCode(err) == pgcode.UndefinedColumn &&
strings.Contains(err.Error(), "column \"job_type\" does not exist")
}

// MaybeGenerateForcedRetryableError returns a
// TransactionRetryWithProtoRefreshError that will cause the txn to be retried
// if the error is because of an undefined job_type column.
//
// In https://github.com/cockroachdb/cockroach/issues/106762 we noticed that if
// a query is executed with an AS OF SYSTEM TIME clause that picks a transaction
// timestamp before the job_type migration, then parts of the jobs
// infrastructure will attempt to query the job_type column even though it
// doesn't exist at the transaction's timestamp.
//
// As a short term fix, when we encounter an `UndefinedColumn` error we
// generate a synthetic retryable error so that the txn is pushed to a
// higher timestamp at which the upgrade will have completed and the
// `job_type` column will be visible. The longer term fix is being tracked
// in https://github.com/cockroachdb/cockroach/issues/106764.
func MaybeGenerateForcedRetryableError(ctx context.Context, txn *kv.Txn, err error) error {
if err != nil && isJobTypeColumnDoesNotExistError(err) {
return txn.GenerateForcedRetryableError(ctx, "synthetic error "+
"to push timestamp to after the `job_type` upgrade has run")
}
return err
}
4 changes: 2 additions & 2 deletions pkg/sql/crdb_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -1069,7 +1069,7 @@ func populateSystemJobsTableRows(
params...,
)
if err != nil {
return matched, err
return matched, jobs.MaybeGenerateForcedRetryableError(ctx, p.Txn(), err)
}

cleanup := func(ctx context.Context) {
Expand All @@ -1082,7 +1082,7 @@ func populateSystemJobsTableRows(
for {
hasNext, err := it.Next(ctx)
if !hasNext || err != nil {
return matched, err
return matched, jobs.MaybeGenerateForcedRetryableError(ctx, p.Txn(), err)
}

currentRow := it.Cur()
Expand Down
1 change: 0 additions & 1 deletion pkg/upgrade/upgrades/json_forward_indexes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import (
)

func TestJSONForwardingIndexes(t *testing.T) {
skip.WithIssue(t, 107169, "flaky test")
var err error
skip.UnderStressRace(t)
defer leaktest.AfterTest(t)()
Expand Down

0 comments on commit ac55715

Please sign in to comment.