From 2712ba5f21db81d78be2b3da0da520df533c5533 Mon Sep 17 00:00:00 2001 From: Jordan Lewis Date: Fri, 26 Feb 2021 18:19:12 -0400 Subject: [PATCH] sql,tree: make ::REGPROC cast efficient Previously, the ::REGPROC cast (which converts an OID of a builtin function to its name or vice versa) was implemented extremely inefficiently: via a query from the internal executor that ultimately has to re-materialize every builtin into memory and filter. This can lead to pathologic quadratic behavior for some common ORM queries. Now, both the oid-to-name and name-to-oid directions are made into constant-time lookups by simply preparing a map of the oid to name and vice versa at startup time. Release note (performance improvement): introspection queries that use casts into the REGPROC pseudo-type are made much more efficient: they're now implemented as a constant-time lookup instead of an internal query. Release justification: low risk, high benefit changes to existing functionality --- .../ddl_analysis/orm_queries_bench_test.go | 55 +++++++++++++++ .../testdata/benchmark_expectations | 1 + .../logictest/testdata/logic_test/pg_catalog | 3 - pkg/sql/pg_catalog.go | 18 +++++ pkg/sql/sem/tree/casts.go | 70 ++++++++++--------- pkg/sql/sem/tree/datum.go | 11 ++- pkg/sql/sem/tree/function_definition.go | 10 ++- pkg/sql/sem/tree/overload.go | 5 ++ 8 files changed, 136 insertions(+), 37 deletions(-) diff --git a/pkg/bench/ddl_analysis/orm_queries_bench_test.go b/pkg/bench/ddl_analysis/orm_queries_bench_test.go index f7e2730a4163..2de13126ebac 100644 --- a/pkg/bench/ddl_analysis/orm_queries_bench_test.go +++ b/pkg/bench/ddl_analysis/orm_queries_bench_test.go @@ -80,6 +80,61 @@ WHERE ( ) AND (n.nspname NOT IN ('pg_catalog', 'pg_toast')) ) AND pg_table_is_visible(c.oid)`, }, + + { + name: "activerecord type introspection query", + stmt: `SELECT + t.oid, t.typname, t.typelem, t.typdelim, t.typinput, r.rngsubtype, t.typtype, t.typbasetype +FROM + pg_type AS t LEFT JOIN pg_range AS r ON oid = rngtypid +WHERE + t.typname + IN ( + 'int2', + 'int4', + 'int8', + 'oid', + 'float4', + 'float8', + 'text', + 'varchar', + 'char', + 'name', + 'bpchar', + 'bool', + 'bit', + 'varbit', + 'timestamptz', + 'date', + 'money', + 'bytea', + 'point', + 'hstore', + 'json', + 'jsonb', + 'cidr', + 'inet', + 'uuid', + 'xml', + 'tsvector', + 'macaddr', + 'citext', + 'ltree', + 'line', + 'lseg', + 'box', + 'path', + 'polygon', + 'circle', + 'interval', + 'time', + 'timestamp', + 'numeric' + ) + OR t.typtype IN ('r', 'e', 'd') + OR t.typinput = 'array_in(cstring,oid,integer)'::REGPROCEDURE + OR t.typelem != 0`, + }, } RunRoundTripBenchmark(b, tests) diff --git a/pkg/bench/ddl_analysis/testdata/benchmark_expectations b/pkg/bench/ddl_analysis/testdata/benchmark_expectations index c50e956e4008..130525f45dd7 100644 --- a/pkg/bench/ddl_analysis/testdata/benchmark_expectations +++ b/pkg/bench/ddl_analysis/testdata/benchmark_expectations @@ -71,3 +71,4 @@ min max benchmark 2 2 ORMQueries/django_table_introspection_1_table 2 2 ORMQueries/django_table_introspection_4_tables 2 2 ORMQueries/django_table_introspection_8_tables +4 4 ORMQueries/activerecord_type_introspection_query diff --git a/pkg/sql/logictest/testdata/logic_test/pg_catalog b/pkg/sql/logictest/testdata/logic_test/pg_catalog index f4bf133b7ad8..45d925fe59d5 100644 --- a/pkg/sql/logictest/testdata/logic_test/pg_catalog +++ b/pkg/sql/logictest/testdata/logic_test/pg_catalog @@ -2761,9 +2761,6 @@ WHERE n.nspname = 'public' query error cannot access virtual schema in anonymous database SELECT count(*) FROM pg_catalog.pg_depend -query error cannot access virtual schema in anonymous database -select 'upper'::REGPROC; - statement ok SET DATABASE = test diff --git a/pkg/sql/pg_catalog.go b/pkg/sql/pg_catalog.go index fff86182b0c2..d62f580b5e8b 100644 --- a/pkg/sql/pg_catalog.go +++ b/pkg/sql/pg_catalog.go @@ -4156,6 +4156,24 @@ https://www.postgresql.org/docs/9.6/catalog-pg-aggregate.html`, }, } +// Populate the oid-to-builtin-function map. We have to do this operation here, +// rather than in the SQL builtins package, because the oidHasher can't live +// there. +func init() { + h := makeOidHasher() + tree.OidToBuiltinName = make(map[oid.Oid]string, len(tree.FunDefs)) + for name, def := range tree.FunDefs { + for _, o := range def.Definition { + if overload, ok := o.(*tree.Overload); ok { + builtinOid := h.BuiltinOid(name, overload) + id := oid.Oid(builtinOid.DInt) + tree.OidToBuiltinName[id] = name + overload.Oid = id + } + } + } +} + // oidHasher provides a consistent hashing mechanism for object identifiers in // pg_catalog tables, allowing for reliable joins across tables. // diff --git a/pkg/sql/sem/tree/casts.go b/pkg/sql/sem/tree/casts.go index bccbb9bac10f..406f0c57190d 100644 --- a/pkg/sql/sem/tree/casts.go +++ b/pkg/sql/sem/tree/casts.go @@ -1271,43 +1271,13 @@ func performCastWithoutPrecisionTruncation(ctx *EvalContext, d Datum, t *types.T case types.OidFamily: switch v := d.(type) { case *DOid: - switch t.Oid() { - case oid.T_oid: - return &DOid{semanticType: t, DInt: v.DInt}, nil - case oid.T_regtype: - // Mapping an oid to a regtype is easy: we have a hardcoded map. - typ, ok := types.OidToType[oid.Oid(v.DInt)] - ret := &DOid{semanticType: t, DInt: v.DInt} - if !ok { - return ret, nil - } - ret.name = typ.PGName() - return ret, nil - default: - oid, err := queryOid(ctx, t, v) - if err != nil { - oid = NewDOid(v.DInt) - oid.semanticType = t - } - return oid, nil - } + return performIntToOidCast(ctx, t, v.DInt) case *DInt: // OIDs are always unsigned 32-bit integers. Some languages, like Java, // store OIDs as signed 32-bit integers, so we implement the cast // by converting to a uint32 first. This matches Postgres behavior. i := DInt(uint32(*v)) - switch t.Oid() { - case oid.T_oid: - return &DOid{semanticType: t, DInt: i}, nil - default: - tmpOid := NewDOid(i) - oid, err := queryOid(ctx, t, tmpOid) - if err != nil { - oid = tmpOid - oid.semanticType = t - } - return oid, nil - } + return performIntToOidCast(ctx, t, i) case *DString: return ParseDOid(ctx, string(*v), t) } @@ -1316,3 +1286,39 @@ func performCastWithoutPrecisionTruncation(ctx *EvalContext, d Datum, t *types.T return nil, pgerror.Newf( pgcode.CannotCoerce, "invalid cast: %s -> %s", d.ResolvedType(), t) } + +// performIntToOidCast casts the input integer to the OID type given by the +// input types.T. +func performIntToOidCast(ctx *EvalContext, t *types.T, v DInt) (Datum, error) { + switch t.Oid() { + case oid.T_oid: + return &DOid{semanticType: t, DInt: v}, nil + case oid.T_regtype: + // Mapping an oid to a regtype is easy: we have a hardcoded map. + typ, ok := types.OidToType[oid.Oid(v)] + ret := &DOid{semanticType: t, DInt: v} + if !ok { + return ret, nil + } + ret.name = typ.PGName() + return ret, nil + + case oid.T_regproc, oid.T_regprocedure: + // Mapping an oid to a regproc is easy: we have a hardcoded map. + name, ok := OidToBuiltinName[oid.Oid(v)] + ret := &DOid{semanticType: t, DInt: v} + if !ok { + return ret, nil + } + ret.name = name + return ret, nil + + default: + oid, err := queryOid(ctx, t, NewDOid(v)) + if err != nil { + oid = NewDOid(v) + oid.semanticType = t + } + return oid, nil + } +} diff --git a/pkg/sql/sem/tree/datum.go b/pkg/sql/sem/tree/datum.go index c15ffe4d24eb..9cd7394066f6 100644 --- a/pkg/sql/sem/tree/datum.go +++ b/pkg/sql/sem/tree/datum.go @@ -4272,7 +4272,16 @@ func ParseDOid(ctx *EvalContext, s string, t *types.T) (*DOid, error) { if err != nil { return nil, err } - return queryOid(ctx, t, NewDString(funcDef.Name)) + if len(funcDef.Definition) > 1 { + return nil, pgerror.Newf(pgcode.AmbiguousAlias, + "more than one function named '%s'", funcDef.Name) + } + def := funcDef.Definition[0] + overload, ok := def.(*Overload) + if !ok { + return nil, errors.AssertionFailedf("invalid non-overload regproc %s", funcDef.Name) + } + return &DOid{semanticType: t, DInt: DInt(overload.Oid), name: funcDef.Name}, nil case oid.T_regtype: parsedTyp, err := ctx.Planner.GetTypeFromValidSQLSyntax(s) if err == nil { diff --git a/pkg/sql/sem/tree/function_definition.go b/pkg/sql/sem/tree/function_definition.go index 0c8d2339e77d..3a0c76f6f699 100644 --- a/pkg/sql/sem/tree/function_definition.go +++ b/pkg/sql/sem/tree/function_definition.go @@ -10,7 +10,10 @@ package tree -import "github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry" +import ( + "github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry" + "github.com/lib/pq/oid" +) // FunctionDefinition implements a reference to the (possibly several) // overloads for a built-in function. @@ -159,6 +162,11 @@ func NewFunctionDefinition( // for every builtin function. Initialized by builtins.init(). var FunDefs map[string]*FunctionDefinition +// OidToBuiltinName contains a map from the hashed OID of all builtin functions +// to their name. We populate this from the pg_catalog.go file in the sql +// package because of dependency issues: we can't use oidHasher from this file. +var OidToBuiltinName map[oid.Oid]string + // Format implements the NodeFormatter interface. func (fd *FunctionDefinition) Format(ctx *FmtCtx) { ctx.WriteString(fd.Name) diff --git a/pkg/sql/sem/tree/overload.go b/pkg/sql/sem/tree/overload.go index 87c3f5d3f51c..bcf0b2d0a23c 100644 --- a/pkg/sql/sem/tree/overload.go +++ b/pkg/sql/sem/tree/overload.go @@ -22,6 +22,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/errors" + "github.com/lib/pq/oid" ) // SpecializedVectorizedBuiltin is used to map overloads @@ -85,6 +86,10 @@ type Overload struct { // volatility against Postgres's volatility at test time. // This should be used with caution. IgnoreVolatilityCheck bool + + // Oid is the cached oidHasher.BuiltinOid result for this Overload. It's + // populated at init-time. + Oid oid.Oid } // params implements the overloadImpl interface.