diff --git a/pkg/jobs/BUILD.bazel b/pkg/jobs/BUILD.bazel index 9b288826a91e..08c053692117 100644 --- a/pkg/jobs/BUILD.bazel +++ b/pkg/jobs/BUILD.bazel @@ -49,6 +49,7 @@ go_library( "//pkg/sql/catalog/descpb", "//pkg/sql/catalog/descs", "//pkg/sql/isql", + "//pkg/sql/pgwire/pgcode", "//pkg/sql/pgwire/pgerror", "//pkg/sql/protoreflect", "//pkg/sql/sem/builtins", diff --git a/pkg/jobs/utils.go b/pkg/jobs/utils.go index 9ba2145fb6ea..17c39fee9724 100644 --- a/pkg/jobs/utils.go +++ b/pkg/jobs/utils.go @@ -19,6 +19,8 @@ import ( "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" "github.com/cockroachdb/cockroach/pkg/kv" "github.com/cockroachdb/cockroach/pkg/sql/isql" + "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" + "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/errors" ) @@ -153,3 +155,33 @@ func JobExists( } return row != nil, nil } + +// IsJobTypeColumnDoesNotExistError returns true if the error is of the form +// `column "job_type" does not exist`. +func isJobTypeColumnDoesNotExistError(err error) bool { + return pgerror.GetPGCode(err) == pgcode.UndefinedColumn && + strings.Contains(err.Error(), "column \"job_type\" does not exist") +} + +// MaybeGenerateForcedRetryableError returns a +// TransactionRetryWithProtoRefreshError that will cause the txn to be retried +// if the error is because of an undefined job_type column. +// +// In https://github.com/cockroachdb/cockroach/issues/106762 we noticed that if +// a query is executed with an AS OF SYSTEM TIME clause that picks a transaction +// timestamp before the job_type migration, then parts of the jobs +// infrastructure will attempt to query the job_type column even though it +// doesn't exist at the transaction's timestamp. +// +// As a short term fix, when we encounter an `UndefinedColumn` error we +// generate a synthetic retryable error so that the txn is pushed to a +// higher timestamp at which the upgrade will have completed and the +// `job_type` column will be visible. The longer term fix is being tracked +// in https://github.com/cockroachdb/cockroach/issues/106764. +func MaybeGenerateForcedRetryableError(ctx context.Context, txn *kv.Txn, err error) error { + if err != nil && isJobTypeColumnDoesNotExistError(err) { + return txn.GenerateForcedRetryableError(ctx, "synthetic error "+ + "to push timestamp to after the `job_type` upgrade has run") + } + return err +} diff --git a/pkg/sql/crdb_internal.go b/pkg/sql/crdb_internal.go index cc4cb654b2d3..51b7cd1c5679 100644 --- a/pkg/sql/crdb_internal.go +++ b/pkg/sql/crdb_internal.go @@ -1069,7 +1069,7 @@ func populateSystemJobsTableRows( params..., ) if err != nil { - return matched, err + return matched, jobs.MaybeGenerateForcedRetryableError(ctx, p.Txn(), err) } cleanup := func(ctx context.Context) { @@ -1082,7 +1082,7 @@ func populateSystemJobsTableRows( for { hasNext, err := it.Next(ctx) if !hasNext || err != nil { - return matched, err + return matched, jobs.MaybeGenerateForcedRetryableError(ctx, p.Txn(), err) } currentRow := it.Cur() diff --git a/pkg/upgrade/upgrades/json_forward_indexes_test.go b/pkg/upgrade/upgrades/json_forward_indexes_test.go index e3e51d9eb5ee..a3ceddc965fe 100644 --- a/pkg/upgrade/upgrades/json_forward_indexes_test.go +++ b/pkg/upgrade/upgrades/json_forward_indexes_test.go @@ -25,7 +25,6 @@ import ( ) func TestJSONForwardingIndexes(t *testing.T) { - skip.WithIssue(t, 107169, "flaky test") var err error skip.UnderStressRace(t) defer leaktest.AfterTest(t)()