workload/schemachange: add support for CREATE FUNCTION

This commit adds support for the `CREATE FUNCTION` DDL to the RSW. While the space for possible UDF incantations is quite large, this commit focused on permutations that are of interest to schema changes. That is to say, the body of the UDF does not have much, if any, variation but will be comprised of references to tables and UDTs. Unfortunately, the introduction of UDFs in the descriptor graph has aggravated an issue within `DROP SCHEMA CASCADE`. This commit contains attempts to correct said issues to no avail. It's possible that either the DSC or LSC are missing some logic for handling UDFs that contain references to UDTs. Prior to this commit, errors preventing the schema from being dropped would be thrown. After it, expected errors are not thrown appropriately despite the checks being hand verified for correctness. Epic: CRDB-19168 Informs: CRDB-3265 Release note: None
cockroachdb · Dec 19, 2023 · 6e1ea5d · 6e1ea5d
1 parent 5f76d93
commit 6e1ea5d
Show file tree

Hide file tree

Showing 5 changed files with 172 additions and 69 deletions.
diff --git a/pkg/workload/schemachange/error_screening.go b/pkg/workload/schemachange/error_screening.go
@@ -1347,66 +1347,29 @@ SELECT EXISTS(
 func (og *operationGenerator) schemaContainsTypesWithCrossSchemaReferences(
 	ctx context.Context, tx pgx.Tx, schemaName string,
 ) (bool, error) {
-	return og.scanBool(ctx, tx, `
-  WITH database_id AS (
-                    SELECT id
-                      FROM system.namespace
-                     WHERE "parentID" = 0
-                       AND "parentSchemaID" = 0
-                       AND name = current_database()
-                   ),
-       schema_id AS (
-                    SELECT nsp.id
-                      FROM system.namespace AS nsp
-                      JOIN database_id ON "parentID" = database_id.id
-                                      AND "parentSchemaID" = 0
-                                      AND name = $1
-                 ),
-       descriptor_ids AS (
-                        SELECT nsp.id
-                          FROM system.namespace AS nsp,
-                               schema_id,
-                               database_id
-                         WHERE nsp."parentID" = database_id.id
-                           AND nsp."parentSchemaID" = schema_id.id
-                      ),
-       descriptors AS (
-                    SELECT crdb_internal.pb_to_json(
-                            'cockroach.sql.sqlbase.Descriptor',
-                            descriptor
-                           ) AS descriptor
-                      FROM system.descriptor AS descriptors
-                      JOIN descriptor_ids ON descriptors.id
-                                             = descriptor_ids.id
-                   ),
-       types AS (
-                SELECT descriptor
-                  FROM descriptors
-                 WHERE (descriptor->'type') IS NOT NULL
-             ),
-       table_references AS (
-                            SELECT json_array_elements(
-                                    descriptor->'table'->'dependedOnBy'
-                                   ) AS ref
-                              FROM descriptors
-                             WHERE (descriptor->'table') IS NOT NULL
-                        ),
-       dependent AS (
-                    SELECT (ref->>'id')::INT8 AS id FROM table_references
-                 ),
-       referenced_descriptors AS (
-                                SELECT json_array_elements_text(
-                                        descriptor->'type'->'referencingDescriptorIds'
-                                       )::INT8 AS id
-                                  FROM types
-                              )
-SELECT EXISTS(
-        SELECT *
-          FROM system.namespace
-         WHERE id IN (SELECT id FROM referenced_descriptors)
-           AND "parentSchemaID" NOT IN (SELECT id FROM schema_id)
-           AND id NOT IN (SELECT id FROM dependent)
-       );`, schemaName)
+	ctes := []CTE{
+		{"descriptors", descJSONQuery},
+		{"functions", functionDescsQuery},
+		{"types", enumDescsQuery},
+		{"referenced_descriptor_ids", `
+				SELECT json_array_elements(descriptor->'referencingDescriptorIds')::INT8 AS id FROM types WHERE schema_id = $1::REGNAMESPACE::INT8
+			UNION ALL
+				SELECT (ref->'id')::INT8 AS id FROM (SELECT json_array_elements(descriptor->'dependedOnBy') AS ref from functions WHERE schema_id = $1::REGNAMESPACE::INT8)
+		`},
+	}
+
+	_, err := Collect(ctx, og, tx, pgx.RowToMap, With(ctes, `SELECT * FROM types WHERE schema_id = $1::REGNAMESPACE::INT8`), schemaName)
+	if err != nil {
+		return false, err
+	}
+
+	result, err := Collect(ctx, og, tx, pgx.RowToMap, With(ctes, `
+		SELECT $1::REGNAMESPACE::INT8 AS this_schema_id, * FROM descriptors d
+		WHERE schema_id != $1::REGNAMESPACE::INT8
+		AND EXISTS(SELECT * FROM referenced_descriptor_ids WHERE id = d.id)
+		AND (NOT descriptor ? 'table')
+	`), schemaName)
+	return len(result) > 0, err
 }
 
 // enumMemberPresent determines whether val is a member of the enum.

diff --git a/pkg/workload/schemachange/operation_generator.go b/pkg/workload/schemachange/operation_generator.go
@@ -3713,9 +3713,12 @@ func (og *operationGenerator) dropSchema(ctx context.Context, tx pgx.Tx) (*opStm
 	if err != nil {
 		return nil, err
 	}
-	crossReferences, err := og.schemaContainsTypesWithCrossSchemaReferences(ctx, tx, schemaName)
-	if err != nil {
-		return nil, err
+	crossReferences := false
+	if schemaExists {
+		crossReferences, err = og.schemaContainsTypesWithCrossSchemaReferences(ctx, tx, schemaName)
+		if err != nil {
+			return nil, err
+		}
 	}
 	stmt := makeOpStmt(OpStmtDDL)
 	stmt.expectedExecErrors.addAll(codesWithConditions{
@@ -3728,6 +3731,110 @@ func (og *operationGenerator) dropSchema(ctx context.Context, tx pgx.Tx) (*opStm
 	return stmt, nil
 }
 
+func (og *operationGenerator) createFunction(ctx context.Context, tx pgx.Tx) (*opStmt, error) {
+	// TODO(chrisseto): Allow referencing sequences as well. Currently, `DROP
+	// SEQUENCE CASCADE` will break if we allow sequences. It may also be good to
+	// reference sequences with next_val or something.
+	tables, err := Collect(ctx, og, tx, pgx.RowTo[string], `SELECT quote_ident(schema_name) || '.' || quote_ident(table_name) FROM [SHOW TABLES] WHERE type != 'sequence'`)
+	if err != nil {
+		return nil, err
+	}
+
+	enumQuery := With([]CTE{
+		{"descriptors", descJSONQuery},
+		{"enums", enumDescsQuery},
+		{"enum_members", enumMemberDescsQuery},
+	}, `SELECT
+				quote_ident(schema_id::REGNAMESPACE::TEXT) || '.' || quote_ident(name) AS name,
+				quote_literal(member->>'logicalRepresentation') AS value,
+				COALESCE(member->>'direction' = 'REMOVE', false) AS dropping
+			FROM enum_members
+		`)
+
+	enums, err := Collect(ctx, og, tx, pgx.RowToMap, enumQuery)
+	if err != nil {
+		return nil, err
+	}
+
+	// Roll some variables to ensure we have variance in the types of references
+	// that we aside from being bound by what we could make references to.
+	useBodyRefs := og.randIntn(2) == 0
+	useParamRefs := og.randIntn(2) == 0
+	useReturnRefs := og.randIntn(2) == 0
+
+	var droppingEnums []string
+	var possibleBodyReferences []string
+	var possibleParamReferences []string
+	var possibleReturnReferences []string
+
+	for i, enum := range enums {
+		if enum["dropping"].(bool) {
+			droppingEnums = append(droppingEnums, enum["name"].(string))
+			continue
+		}
+		possibleReturnReferences = append(possibleReturnReferences, enum["name"].(string))
+		possibleParamReferences = append(possibleParamReferences, fmt.Sprintf(`enum_%d %s`, i, enum["name"]))
+		possibleBodyReferences = append(possibleBodyReferences, fmt.Sprintf(`(%s::%s IS NULL)`, enum["value"], enum["name"]))
+	}
+
+	for _, table := range tables {
+		possibleReturnReferences = append(possibleReturnReferences, fmt.Sprintf(`SETOF %s`, table))
+		possibleBodyReferences = append(possibleBodyReferences, fmt.Sprintf(`((SELECT count(*) FROM %s LIMIT 0) = 0)`, table))
+	}
+
+	// TODO(chrisseto): There's no randomization across STRICT, VOLATILE,
+	// IMMUTABLE, STABLE, STRICT, and [NOT] LEAKPROOF. That's likely not relevant
+	// to the schema workload but may become a nice to have.
+	stmt, expectedCode, err := Generate[*tree.CreateRoutine](og.params.rng, og.produceError(), []GenerationCase{
+		// 1. Nothing special, fully self contained function.
+		{pgcode.SuccessfulCompletion, `CREATE FUNCTION { UniqueName } () RETURNS VOID LANGUAGE SQL AS $$ SELECT NULL $$`},
+		// 2. 1 or more table or type references spread across parameters, return types, or the function body.
+		{pgcode.SuccessfulCompletion, `CREATE FUNCTION { UniqueName } ({ ParamRefs }) RETURNS { ReturnRefs } LANGUAGE SQL AS $$ SELECT NULL WHERE { BodyRefs } $$`},
+		// 3. Reference a table that does not exist.
+		{pgcode.UndefinedTable, `CREATE FUNCTION { UniqueName } () RETURNS VOID LANGUAGE SQL AS $$ SELECT * FROM "ThisTableDoesNotExist" $$`},
+		// 4. Reference a UDT that does not exist.
+		{pgcode.UndefinedObject, `CREATE FUNCTION { UniqueName } (IN p1 "ThisTypeDoesNotExist") RETURNS VOID LANGUAGE SQL AS $$ SELECT NULL $$`},
+		// 5. Reference an Enum that's in the process of being dropped
+		{pgcode.UndefinedTable, `CREATE FUNCTION { UniqueName } (IN p1 { DroppingEnum }) RETURNS VOID LANGUAGE SQL AS $$ SELECT NULL $$`},
+	}, template.FuncMap{
+		"UniqueName": func() *tree.Name {
+			name := tree.Name(fmt.Sprintf("udf_%d", og.newUniqueSeqNum()))
+			return &name
+		},
+		"DroppingEnum": func() (string, error) {
+			return PickOne(og.params.rng, droppingEnums)
+		},
+		"ParamRefs": func() (string, error) {
+			refs, err := PickAtLeast(og.params.rng, 1, possibleParamReferences)
+			if useParamRefs && err == nil {
+				return strings.Join(refs, ", "), nil
+			}
+			return "", nil //nolint:returnerrcheck
+		},
+		"ReturnRefs": func() (string, error) {
+			ref, err := PickOne(og.params.rng, possibleReturnReferences)
+			if useReturnRefs && err == nil {
+				return ref, nil
+			}
+			return "VOID", nil //nolint:returnerrcheck
+		},
+		"BodyRefs": func() (string, error) {
+			refs, err := PickAtLeast(og.params.rng, 1, possibleBodyReferences)
+			if useBodyRefs && err == nil {
+				return strings.Join(refs, " AND "), nil
+			}
+			return "TRUE", nil //nolint:returnerrcheck
+		},
+	})
+
+	if err != nil {
+		return nil, err
+	}
+	return newOpStmt(stmt, codesWithConditions{
+		{expectedCode, true},
+	}), nil
+}
+
 func (og *operationGenerator) selectStmt(ctx context.Context, tx pgx.Tx) (stmt *opStmt, err error) {
 	const maxTablesForSelect = 3
 	const maxColumnsForSelect = 16

diff --git a/pkg/workload/schemachange/optype.go b/pkg/workload/schemachange/optype.go
@@ -120,6 +120,7 @@ const (
 	createTable    // CREATE TABLE <table> <def>
 	createTableAs  // CREATE TABLE <table> AS <def>
 	createView     // CREATE VIEW <view> AS <def>
+	createFunction // CREATE FUNCTION <function> ...
 
 	// DROP ...
 
@@ -225,6 +226,7 @@ var opFuncs = []func(*operationGenerator, context.Context, pgx.Tx) (*opStmt, err
 	alterTableSetColumnDefault:        (*operationGenerator).setColumnDefault,
 	alterTableSetColumnNotNull:        (*operationGenerator).setColumnNotNull,
 	alterTypeDropValue:                (*operationGenerator).alterTypeDropValue,
+	createFunction:                    (*operationGenerator).createFunction,
 	createIndex:                       (*operationGenerator).createIndex,
 	createSchema:                      (*operationGenerator).createSchema,
 	createSequence:                    (*operationGenerator).createSequence,
@@ -267,6 +269,7 @@ var opWeights = []int{
 	createView:                        1,
 	createTypeEnum:                    1,
 	createSchema:                      1,
+	createFunction:                    1,
 	alterTableDropColumn:              0,
 	alterTableDropColumnDefault:       1,
 	alterTableDropNotNull:             1,

diff --git a/pkg/workload/schemachange/optype_string.go b/pkg/workload/schemachange/optype_string.go
diff --git a/pkg/workload/schemachange/query_util.go b/pkg/workload/schemachange/query_util.go
@@ -22,14 +22,34 @@ const (
 	// descJSONQuery returns the JSONified version of all descriptors in the
 	// current database joined with system.namespace.
 	//
+	// NOTE:descJSONQuery injects "virtual" system.namespace entries for function
+	// descriptors as they do not have "proper" namespace entries.
+	//
 	// id::int | schema_id::int | name::text | descriptor::json
 	descJSONQuery = `SELECT
 		descriptor.id,
 		"parentSchemaID" AS schema_id,
 		namespace.name AS name,
 		crdb_internal.pb_to_json('desc', descriptor) AS descriptor
 	FROM system.descriptor
-	JOIN system.namespace ON namespace.id = descriptor.id
+	JOIN (
+		SELECT * FROM system.namespace
+			UNION
+		SELECT
+			"parentID",
+			"parentSchemaID",
+			(json_each).@1 AS name,
+			(json_array_elements((json_each).@2->'signatures')->'id')::INT8 AS id
+		FROM (
+			SELECT
+				ns."parentID",
+				ns.id AS "parentSchemaID",
+				json_each(crdb_internal.pb_to_json('desc', descriptor)->'schema'->'functions')
+			FROM system.descriptor
+			JOIN system.namespace ns ON ns.id = descriptor.id
+			WHERE crdb_internal.pb_to_json('desc', descriptor) ? 'schema'
+		)
+	) namespace ON namespace.id = descriptor.id
 	WHERE "parentID" = (SELECT id FROM system.namespace WHERE name = current_database() AND "parentID" = 0)
 	`
 
@@ -49,6 +69,13 @@ const (
 	// id::int | schema_id::int | name::text | descriptor::json | member::json
 	enumMemberDescsQuery = `SELECT *, jsonb_array_elements(descriptor->'enumMembers') AS member FROM enums`
 
+	// functionDescsQuery returns the JSONified version of all function descriptors in the current database.
+	//
+	// [descJSONQuery] must be bound to the name "descriptors".
+	//
+	// id::int | schema_id::int | name::text | descriptor::json
+	functionDescsQuery = `SELECT id, schema_id, name, descriptor->'function' AS descriptor FROM descriptors WHERE descriptor ? 'function'`
+
 	regionsFromClusterQuery = `SELECT * FROM [SHOW REGIONS FROM CLUSTER]`
 )