Skip to content

Commit

Permalink
workload/schemachange: add support for CREATE FUNCTION
Browse files Browse the repository at this point in the history
This commit adds support for the `CREATE FUNCTION` DDL to the RSW.

While the space for possible UDF incantations is quite large, this
commit focused on permutations that are of interest to schema changes.
That is to say, the body of the UDF does not have much, if any,
variation but will be comprised of references to tables and UDTs.

Unfortunately, the introduction of UDFs in the descriptor graph has
aggravated an issue within `DROP SCHEMA CASCADE`.

This commit contains attempts to correct said issues to no avail. It's
possible that either the DSC or LSC are missing some logic for handling
UDFs that contain references to UDTs. Prior to this commit, errors
preventing the schema from being dropped would be thrown. After it,
expected errors are not thrown appropriately despite the checks being
hand verified for correctness.

Epic: CRDB-19168
Informs: CRDB-3265
Release note: None
  • Loading branch information
chrisseto committed Dec 19, 2023
1 parent 5f76d93 commit 6e1ea5d
Show file tree
Hide file tree
Showing 5 changed files with 172 additions and 69 deletions.
83 changes: 23 additions & 60 deletions pkg/workload/schemachange/error_screening.go
Original file line number Diff line number Diff line change
Expand Up @@ -1347,66 +1347,29 @@ SELECT EXISTS(
func (og *operationGenerator) schemaContainsTypesWithCrossSchemaReferences(
ctx context.Context, tx pgx.Tx, schemaName string,
) (bool, error) {
return og.scanBool(ctx, tx, `
WITH database_id AS (
SELECT id
FROM system.namespace
WHERE "parentID" = 0
AND "parentSchemaID" = 0
AND name = current_database()
),
schema_id AS (
SELECT nsp.id
FROM system.namespace AS nsp
JOIN database_id ON "parentID" = database_id.id
AND "parentSchemaID" = 0
AND name = $1
),
descriptor_ids AS (
SELECT nsp.id
FROM system.namespace AS nsp,
schema_id,
database_id
WHERE nsp."parentID" = database_id.id
AND nsp."parentSchemaID" = schema_id.id
),
descriptors AS (
SELECT crdb_internal.pb_to_json(
'cockroach.sql.sqlbase.Descriptor',
descriptor
) AS descriptor
FROM system.descriptor AS descriptors
JOIN descriptor_ids ON descriptors.id
= descriptor_ids.id
),
types AS (
SELECT descriptor
FROM descriptors
WHERE (descriptor->'type') IS NOT NULL
),
table_references AS (
SELECT json_array_elements(
descriptor->'table'->'dependedOnBy'
) AS ref
FROM descriptors
WHERE (descriptor->'table') IS NOT NULL
),
dependent AS (
SELECT (ref->>'id')::INT8 AS id FROM table_references
),
referenced_descriptors AS (
SELECT json_array_elements_text(
descriptor->'type'->'referencingDescriptorIds'
)::INT8 AS id
FROM types
)
SELECT EXISTS(
SELECT *
FROM system.namespace
WHERE id IN (SELECT id FROM referenced_descriptors)
AND "parentSchemaID" NOT IN (SELECT id FROM schema_id)
AND id NOT IN (SELECT id FROM dependent)
);`, schemaName)
ctes := []CTE{
{"descriptors", descJSONQuery},
{"functions", functionDescsQuery},
{"types", enumDescsQuery},
{"referenced_descriptor_ids", `
SELECT json_array_elements(descriptor->'referencingDescriptorIds')::INT8 AS id FROM types WHERE schema_id = $1::REGNAMESPACE::INT8
UNION ALL
SELECT (ref->'id')::INT8 AS id FROM (SELECT json_array_elements(descriptor->'dependedOnBy') AS ref from functions WHERE schema_id = $1::REGNAMESPACE::INT8)
`},
}

_, err := Collect(ctx, og, tx, pgx.RowToMap, With(ctes, `SELECT * FROM types WHERE schema_id = $1::REGNAMESPACE::INT8`), schemaName)
if err != nil {
return false, err
}

result, err := Collect(ctx, og, tx, pgx.RowToMap, With(ctes, `
SELECT $1::REGNAMESPACE::INT8 AS this_schema_id, * FROM descriptors d
WHERE schema_id != $1::REGNAMESPACE::INT8
AND EXISTS(SELECT * FROM referenced_descriptor_ids WHERE id = d.id)
AND (NOT descriptor ? 'table')
`), schemaName)
return len(result) > 0, err
}

// enumMemberPresent determines whether val is a member of the enum.
Expand Down
113 changes: 110 additions & 3 deletions pkg/workload/schemachange/operation_generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -3713,9 +3713,12 @@ func (og *operationGenerator) dropSchema(ctx context.Context, tx pgx.Tx) (*opStm
if err != nil {
return nil, err
}
crossReferences, err := og.schemaContainsTypesWithCrossSchemaReferences(ctx, tx, schemaName)
if err != nil {
return nil, err
crossReferences := false
if schemaExists {
crossReferences, err = og.schemaContainsTypesWithCrossSchemaReferences(ctx, tx, schemaName)
if err != nil {
return nil, err
}
}
stmt := makeOpStmt(OpStmtDDL)
stmt.expectedExecErrors.addAll(codesWithConditions{
Expand All @@ -3728,6 +3731,110 @@ func (og *operationGenerator) dropSchema(ctx context.Context, tx pgx.Tx) (*opStm
return stmt, nil
}

func (og *operationGenerator) createFunction(ctx context.Context, tx pgx.Tx) (*opStmt, error) {
// TODO(chrisseto): Allow referencing sequences as well. Currently, `DROP
// SEQUENCE CASCADE` will break if we allow sequences. It may also be good to
// reference sequences with next_val or something.
tables, err := Collect(ctx, og, tx, pgx.RowTo[string], `SELECT quote_ident(schema_name) || '.' || quote_ident(table_name) FROM [SHOW TABLES] WHERE type != 'sequence'`)
if err != nil {
return nil, err
}

enumQuery := With([]CTE{
{"descriptors", descJSONQuery},
{"enums", enumDescsQuery},
{"enum_members", enumMemberDescsQuery},
}, `SELECT
quote_ident(schema_id::REGNAMESPACE::TEXT) || '.' || quote_ident(name) AS name,
quote_literal(member->>'logicalRepresentation') AS value,
COALESCE(member->>'direction' = 'REMOVE', false) AS dropping
FROM enum_members
`)

enums, err := Collect(ctx, og, tx, pgx.RowToMap, enumQuery)
if err != nil {
return nil, err
}

// Roll some variables to ensure we have variance in the types of references
// that we aside from being bound by what we could make references to.
useBodyRefs := og.randIntn(2) == 0
useParamRefs := og.randIntn(2) == 0
useReturnRefs := og.randIntn(2) == 0

var droppingEnums []string
var possibleBodyReferences []string
var possibleParamReferences []string
var possibleReturnReferences []string

for i, enum := range enums {
if enum["dropping"].(bool) {
droppingEnums = append(droppingEnums, enum["name"].(string))
continue
}
possibleReturnReferences = append(possibleReturnReferences, enum["name"].(string))
possibleParamReferences = append(possibleParamReferences, fmt.Sprintf(`enum_%d %s`, i, enum["name"]))
possibleBodyReferences = append(possibleBodyReferences, fmt.Sprintf(`(%s::%s IS NULL)`, enum["value"], enum["name"]))
}

for _, table := range tables {
possibleReturnReferences = append(possibleReturnReferences, fmt.Sprintf(`SETOF %s`, table))
possibleBodyReferences = append(possibleBodyReferences, fmt.Sprintf(`((SELECT count(*) FROM %s LIMIT 0) = 0)`, table))
}

// TODO(chrisseto): There's no randomization across STRICT, VOLATILE,
// IMMUTABLE, STABLE, STRICT, and [NOT] LEAKPROOF. That's likely not relevant
// to the schema workload but may become a nice to have.
stmt, expectedCode, err := Generate[*tree.CreateRoutine](og.params.rng, og.produceError(), []GenerationCase{
// 1. Nothing special, fully self contained function.
{pgcode.SuccessfulCompletion, `CREATE FUNCTION { UniqueName } () RETURNS VOID LANGUAGE SQL AS $$ SELECT NULL $$`},
// 2. 1 or more table or type references spread across parameters, return types, or the function body.
{pgcode.SuccessfulCompletion, `CREATE FUNCTION { UniqueName } ({ ParamRefs }) RETURNS { ReturnRefs } LANGUAGE SQL AS $$ SELECT NULL WHERE { BodyRefs } $$`},
// 3. Reference a table that does not exist.
{pgcode.UndefinedTable, `CREATE FUNCTION { UniqueName } () RETURNS VOID LANGUAGE SQL AS $$ SELECT * FROM "ThisTableDoesNotExist" $$`},
// 4. Reference a UDT that does not exist.
{pgcode.UndefinedObject, `CREATE FUNCTION { UniqueName } (IN p1 "ThisTypeDoesNotExist") RETURNS VOID LANGUAGE SQL AS $$ SELECT NULL $$`},
// 5. Reference an Enum that's in the process of being dropped
{pgcode.UndefinedTable, `CREATE FUNCTION { UniqueName } (IN p1 { DroppingEnum }) RETURNS VOID LANGUAGE SQL AS $$ SELECT NULL $$`},
}, template.FuncMap{
"UniqueName": func() *tree.Name {
name := tree.Name(fmt.Sprintf("udf_%d", og.newUniqueSeqNum()))
return &name
},
"DroppingEnum": func() (string, error) {
return PickOne(og.params.rng, droppingEnums)
},
"ParamRefs": func() (string, error) {
refs, err := PickAtLeast(og.params.rng, 1, possibleParamReferences)
if useParamRefs && err == nil {
return strings.Join(refs, ", "), nil
}
return "", nil //nolint:returnerrcheck
},
"ReturnRefs": func() (string, error) {
ref, err := PickOne(og.params.rng, possibleReturnReferences)
if useReturnRefs && err == nil {
return ref, nil
}
return "VOID", nil //nolint:returnerrcheck
},
"BodyRefs": func() (string, error) {
refs, err := PickAtLeast(og.params.rng, 1, possibleBodyReferences)
if useBodyRefs && err == nil {
return strings.Join(refs, " AND "), nil
}
return "TRUE", nil //nolint:returnerrcheck
},
})

if err != nil {
return nil, err
}
return newOpStmt(stmt, codesWithConditions{
{expectedCode, true},
}), nil
}

func (og *operationGenerator) selectStmt(ctx context.Context, tx pgx.Tx) (stmt *opStmt, err error) {
const maxTablesForSelect = 3
const maxColumnsForSelect = 16
Expand Down
3 changes: 3 additions & 0 deletions pkg/workload/schemachange/optype.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ const (
createTable // CREATE TABLE <table> <def>
createTableAs // CREATE TABLE <table> AS <def>
createView // CREATE VIEW <view> AS <def>
createFunction // CREATE FUNCTION <function> ...

// DROP ...

Expand Down Expand Up @@ -225,6 +226,7 @@ var opFuncs = []func(*operationGenerator, context.Context, pgx.Tx) (*opStmt, err
alterTableSetColumnDefault: (*operationGenerator).setColumnDefault,
alterTableSetColumnNotNull: (*operationGenerator).setColumnNotNull,
alterTypeDropValue: (*operationGenerator).alterTypeDropValue,
createFunction: (*operationGenerator).createFunction,
createIndex: (*operationGenerator).createIndex,
createSchema: (*operationGenerator).createSchema,
createSequence: (*operationGenerator).createSequence,
Expand Down Expand Up @@ -267,6 +269,7 @@ var opWeights = []int{
createView: 1,
createTypeEnum: 1,
createSchema: 1,
createFunction: 1,
alterTableDropColumn: 0,
alterTableDropColumnDefault: 1,
alterTableDropNotNull: 1,
Expand Down
13 changes: 8 additions & 5 deletions pkg/workload/schemachange/optype_string.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 28 additions & 1 deletion pkg/workload/schemachange/query_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,34 @@ const (
// descJSONQuery returns the JSONified version of all descriptors in the
// current database joined with system.namespace.
//
// NOTE:descJSONQuery injects "virtual" system.namespace entries for function
// descriptors as they do not have "proper" namespace entries.
//
// id::int | schema_id::int | name::text | descriptor::json
descJSONQuery = `SELECT
descriptor.id,
"parentSchemaID" AS schema_id,
namespace.name AS name,
crdb_internal.pb_to_json('desc', descriptor) AS descriptor
FROM system.descriptor
JOIN system.namespace ON namespace.id = descriptor.id
JOIN (
SELECT * FROM system.namespace
UNION
SELECT
"parentID",
"parentSchemaID",
(json_each).@1 AS name,
(json_array_elements((json_each).@2->'signatures')->'id')::INT8 AS id
FROM (
SELECT
ns."parentID",
ns.id AS "parentSchemaID",
json_each(crdb_internal.pb_to_json('desc', descriptor)->'schema'->'functions')
FROM system.descriptor
JOIN system.namespace ns ON ns.id = descriptor.id
WHERE crdb_internal.pb_to_json('desc', descriptor) ? 'schema'
)
) namespace ON namespace.id = descriptor.id
WHERE "parentID" = (SELECT id FROM system.namespace WHERE name = current_database() AND "parentID" = 0)
`

Expand All @@ -49,6 +69,13 @@ const (
// id::int | schema_id::int | name::text | descriptor::json | member::json
enumMemberDescsQuery = `SELECT *, jsonb_array_elements(descriptor->'enumMembers') AS member FROM enums`

// functionDescsQuery returns the JSONified version of all function descriptors in the current database.
//
// [descJSONQuery] must be bound to the name "descriptors".
//
// id::int | schema_id::int | name::text | descriptor::json
functionDescsQuery = `SELECT id, schema_id, name, descriptor->'function' AS descriptor FROM descriptors WHERE descriptor ? 'function'`

regionsFromClusterQuery = `SELECT * FROM [SHOW REGIONS FROM CLUSTER]`
)

Expand Down

0 comments on commit 6e1ea5d

Please sign in to comment.