diff --git a/pkg/ccl/testccl/workload/schemachange/BUILD.bazel b/pkg/ccl/testccl/workload/schemachange/BUILD.bazel index b5e6e0246d16..8341b8995be8 100644 --- a/pkg/ccl/testccl/workload/schemachange/BUILD.bazel +++ b/pkg/ccl/testccl/workload/schemachange/BUILD.bazel @@ -3,11 +3,12 @@ load("@io_bazel_rules_go//go:def.bzl", "go_test") go_test( name = "schemachange_test", + size = "large", srcs = [ "main_test.go", "schema_change_external_test.go", ], - args = ["-test.timeout=295s"], + args = ["-test.timeout=895s"], data = [ "//c-deps:libgeos", ], diff --git a/pkg/ccl/testccl/workload/schemachange/schema_change_external_test.go b/pkg/ccl/testccl/workload/schemachange/schema_change_external_test.go index 9c8d1676cc2e..754dbce520b8 100644 --- a/pkg/ccl/testccl/workload/schemachange/schema_change_external_test.go +++ b/pkg/ccl/testccl/workload/schemachange/schema_change_external_test.go @@ -34,7 +34,7 @@ import ( func TestWorkload(t *testing.T) { defer leaktest.AfterTest(t)() defer utilccl.TestingEnableEnterprise()() - skip.WithIssue(t, 78478) + skip.UnderRace(t, "test connections can be too slow under race option.") dir := t.TempDir() ctx := context.Background() @@ -90,7 +90,7 @@ func TestWorkload(t *testing.T) { ql, err := wl.Ops(ctx, []string{pgURL.String()}, reg) require.NoError(t, err) - const N = 100 + const N = 800 workerFn := func(ctx context.Context, fn func(ctx context.Context) error) func() error { return func() error { for i := 0; i < N; i++ { diff --git a/pkg/cmd/roachtest/tests/schemachange_random_load.go b/pkg/cmd/roachtest/tests/schemachange_random_load.go index e1171ff6531a..bced74144660 100644 --- a/pkg/cmd/roachtest/tests/schemachange_random_load.go +++ b/pkg/cmd/roachtest/tests/schemachange_random_load.go @@ -45,9 +45,6 @@ func registerSchemaChangeRandomLoad(r registry.Registry) { spec.Geo(), spec.Zones(geoZonesStr), ), - // This is set while development is still happening on the workload and we - // fix (or bypass) minor schema change bugs that are discovered. - NonReleaseBlocker: true, Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { maxOps := 5000 concurrency := 20 diff --git a/pkg/sql/resolve_oid.go b/pkg/sql/resolve_oid.go index da9b820aad49..cdba7448764f 100644 --- a/pkg/sql/resolve_oid.go +++ b/pkg/sql/resolve_oid.go @@ -15,6 +15,7 @@ import ( "fmt" "github.com/cockroachdb/cockroach/pkg/kv" + "github.com/cockroachdb/cockroach/pkg/sql/catalog" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" @@ -77,7 +78,12 @@ func resolveOID( results, err := ie.QueryRowEx(ctx, "queryOid", txn, sessiondata.NoSessionDataOverride, q, toResolve) if err != nil { - if errors.HasType(err, (*tree.MultipleResultsError)(nil)) { + if catalog.HasInactiveDescriptorError(err) { + // Descriptor is either dropped or offline, so + // the OID does not exist. + return nil, true, pgerror.Newf(info.errType, + "%s %s does not exist", info.objName, toResolve) + } else if errors.HasType(err, (*tree.MultipleResultsError)(nil)) { return nil, false, pgerror.Newf(pgcode.AmbiguousAlias, "more than one %s named %s", info.objName, toResolve) } diff --git a/pkg/workload/schemachange/error_screening.go b/pkg/workload/schemachange/error_screening.go index 6292014bee12..4d5ad403700d 100644 --- a/pkg/workload/schemachange/error_screening.go +++ b/pkg/workload/schemachange/error_screening.go @@ -560,14 +560,20 @@ SELECT count(*) > 0 return nil } +func getValidGenerationErrors() errorCodeSet { + return errorCodeSet{ + pgcode.NumericValueOutOfRange: true, + pgcode.FloatingPointException: true, + pgcode.InvalidTextRepresentation: true, + } +} + // isValidGenerationError these codes can be observed when evaluating values // for generated expressions. These are errors are not ignored, but added into // the expected set of errors. func isValidGenerationError(code string) bool { pgCode := pgcode.MakeCode(code) - return pgCode == pgcode.NumericValueOutOfRange || - pgCode == pgcode.FloatingPointException || - pgCode == pgcode.InvalidTextRepresentation + return getValidGenerationErrors().contains(pgCode) } // validateGeneratedExpressionsForInsert goes through generated expressions and diff --git a/pkg/workload/schemachange/operation_generator.go b/pkg/workload/schemachange/operation_generator.go index f4ca24d89ad6..c447fee36163 100644 --- a/pkg/workload/schemachange/operation_generator.go +++ b/pkg/workload/schemachange/operation_generator.go @@ -54,7 +54,6 @@ type operationGeneratorParams struct { // The OperationBuilder has the sole responsibility of generating ops type operationGenerator struct { params *operationGeneratorParams - potentialExecErrors errorCodeSet expectedCommitErrors errorCodeSet potentialCommitErrors errorCodeSet @@ -103,7 +102,6 @@ func makeOperationGenerator(params *operationGeneratorParams) *operationGenerato return &operationGenerator{ params: params, expectedCommitErrors: makeExpectedErrorSet(), - potentialExecErrors: makeExpectedErrorSet(), potentialCommitErrors: makeExpectedErrorSet(), candidateExpectedCommitErrors: makeExpectedErrorSet(), } @@ -112,7 +110,6 @@ func makeOperationGenerator(params *operationGeneratorParams) *operationGenerato // Reset internal state used per operation within a transaction func (og *operationGenerator) resetOpState() { og.candidateExpectedCommitErrors.reset() - og.potentialExecErrors.reset() og.opGenLog = strings.Builder{} } @@ -881,7 +878,7 @@ func (og *operationGenerator) addForeignKeyConstraint( // perfectly if an error is expected. We can confirm post transaction with a time // travel query. _ = rowsSatisfyConstraint - og.potentialExecErrors.add(pgcode.ForeignKeyViolation) + stmt.potentialExecErrors.add(pgcode.ForeignKeyViolation) og.potentialCommitErrors.add(pgcode.ForeignKeyViolation) // It's possible for the table to be dropped concurrently, while we are running @@ -1338,6 +1335,14 @@ func (og *operationGenerator) createTableAs(ctx context.Context, tx pgx.Tx) (*op {code: pgcode.DuplicateColumn, condition: duplicateColumns}, }.add(opStmt.expectedExecErrors) + // Confirm the select itself doesn't run into any column generation errors, + // by executing it independently first until we add validation when adding + // generated columns. See issue: #81698?, which will allow us to remove this + // logic in the future. + if opStmt.expectedExecErrors.empty() { + opStmt.potentialExecErrors.merge(getValidGenerationErrors()) + } + opStmt.sql = fmt.Sprintf(`CREATE TABLE %s AS %s FETCH FIRST %d ROWS ONLY`, destTableName, selectStatement.String(), MaxRowsToConsume) return opStmt, nil @@ -2404,7 +2409,7 @@ func (og *operationGenerator) insertRow(ctx context.Context, tx pgx.Tx) (stmt *o return nil, err } // We may have errors that are possible, but not guaranteed. - potentialErrors.add(og.potentialExecErrors) + potentialErrors.add(stmt.potentialExecErrors) if invalidInsert { generatedErrors.add(stmt.expectedExecErrors) // We will be pessimistic and assume that other column related errors can diff --git a/pkg/workload/schemachange/schemachange.go b/pkg/workload/schemachange/schemachange.go index 93f3d1240e3c..36d65fa6f979 100644 --- a/pkg/workload/schemachange/schemachange.go +++ b/pkg/workload/schemachange/schemachange.go @@ -313,17 +313,12 @@ func (w *schemaChangeWorker) recordInHist(elapsed time.Duration, bin histBin) { func (w *schemaChangeWorker) getErrorState() string { return fmt.Sprintf("Dumping state before death:\n"+ - "Expected errors: %s"+ - "Potential exec errors: %s"+ "Expected commit errors: %s"+ "Potential commit errors: %s"+ "==========================="+ "Executed queries for generating errors: %s"+ "==========================="+ "Previous statements %s", - "", - // w.opGen.expectedExecErrors.String(), - w.opGen.potentialExecErrors.String(), w.opGen.expectedCommitErrors.String(), w.opGen.potentialCommitErrors.String(), w.opGen.GetOpGenLog(), diff --git a/pkg/workload/schemachange/watch_dog.go b/pkg/workload/schemachange/watch_dog.go index da021dc5d3f6..18a983d5f929 100644 --- a/pkg/workload/schemachange/watch_dog.go +++ b/pkg/workload/schemachange/watch_dog.go @@ -93,7 +93,15 @@ func (w *schemaChangeWatchDog) watchLoop(ctx context.Context) { close(responseChannel) return case <-ctx.Done(): - panic("dumping stacks, we failed to terminate threads on time.") + // Give the connections a small amount of time to clean up, if they fail + // to do so, we will dump stacks. + select { + case <-w.cmdChannel: + return + case <-time.After(time.Second * 4): + panic("dumping stacks, we failed to terminate threads on time.") + + } case <-time.After(time.Second): // If the connection is making progress, the watch dog timer can be reset // again.