Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
61211: sql,tree: make ::REGPROC cast efficient r=jordanlewis a=jordanlewis

Previously, the ::REGPROC cast (which converts an OID of a builtin
function to its name or vice versa) was implemented extremely
inefficiently: via a query from the internal executor that ultimately
has to re-materialize every builtin into memory and filter. This can
lead to pathologic quadratic behavior for some common ORM queries.

Now, both the oid-to-name and name-to-oid directions are made into
constant-time lookups by simply preparing a map of the oid to name and
vice versa at startup time.

Closes cockroachdb#61082.

Release note (performance improvement): introspection queries that use
casts into the REGPROC pseudo-type are made much more efficient: they're
now implemented as a constant-time lookup instead of an internal query.

Release justification: low risk, high benefit changes to existing functionality

Co-authored-by: Jordan Lewis <[email protected]>
  • Loading branch information
craig[bot] and jordanlewis committed Mar 4, 2021
2 parents 69ed26b + 2712ba5 commit 2581a86
Show file tree
Hide file tree
Showing 8 changed files with 136 additions and 37 deletions.
55 changes: 55 additions & 0 deletions pkg/bench/ddl_analysis/orm_queries_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,61 @@ WHERE (
) AND (n.nspname NOT IN ('pg_catalog', 'pg_toast'))
) AND pg_table_is_visible(c.oid)`,
},

{
name: "activerecord type introspection query",
stmt: `SELECT
t.oid, t.typname, t.typelem, t.typdelim, t.typinput, r.rngsubtype, t.typtype, t.typbasetype
FROM
pg_type AS t LEFT JOIN pg_range AS r ON oid = rngtypid
WHERE
t.typname
IN (
'int2',
'int4',
'int8',
'oid',
'float4',
'float8',
'text',
'varchar',
'char',
'name',
'bpchar',
'bool',
'bit',
'varbit',
'timestamptz',
'date',
'money',
'bytea',
'point',
'hstore',
'json',
'jsonb',
'cidr',
'inet',
'uuid',
'xml',
'tsvector',
'macaddr',
'citext',
'ltree',
'line',
'lseg',
'box',
'path',
'polygon',
'circle',
'interval',
'time',
'timestamp',
'numeric'
)
OR t.typtype IN ('r', 'e', 'd')
OR t.typinput = 'array_in(cstring,oid,integer)'::REGPROCEDURE
OR t.typelem != 0`,
},
}

RunRoundTripBenchmark(b, tests)
Expand Down
1 change: 1 addition & 0 deletions pkg/bench/ddl_analysis/testdata/benchmark_expectations
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,4 @@ min max benchmark
2 2 ORMQueries/django_table_introspection_1_table
2 2 ORMQueries/django_table_introspection_4_tables
2 2 ORMQueries/django_table_introspection_8_tables
4 4 ORMQueries/activerecord_type_introspection_query
3 changes: 0 additions & 3 deletions pkg/sql/logictest/testdata/logic_test/pg_catalog
Original file line number Diff line number Diff line change
Expand Up @@ -2761,9 +2761,6 @@ WHERE n.nspname = 'public'
query error cannot access virtual schema in anonymous database
SELECT count(*) FROM pg_catalog.pg_depend

query error cannot access virtual schema in anonymous database
select 'upper'::REGPROC;

statement ok
SET DATABASE = test

Expand Down
18 changes: 18 additions & 0 deletions pkg/sql/pg_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -4158,6 +4158,24 @@ https://www.postgresql.org/docs/9.6/catalog-pg-aggregate.html`,
},
}

// Populate the oid-to-builtin-function map. We have to do this operation here,
// rather than in the SQL builtins package, because the oidHasher can't live
// there.
func init() {
h := makeOidHasher()
tree.OidToBuiltinName = make(map[oid.Oid]string, len(tree.FunDefs))
for name, def := range tree.FunDefs {
for _, o := range def.Definition {
if overload, ok := o.(*tree.Overload); ok {
builtinOid := h.BuiltinOid(name, overload)
id := oid.Oid(builtinOid.DInt)
tree.OidToBuiltinName[id] = name
overload.Oid = id
}
}
}
}

// oidHasher provides a consistent hashing mechanism for object identifiers in
// pg_catalog tables, allowing for reliable joins across tables.
//
Expand Down
70 changes: 38 additions & 32 deletions pkg/sql/sem/tree/casts.go
Original file line number Diff line number Diff line change
Expand Up @@ -1271,43 +1271,13 @@ func performCastWithoutPrecisionTruncation(ctx *EvalContext, d Datum, t *types.T
case types.OidFamily:
switch v := d.(type) {
case *DOid:
switch t.Oid() {
case oid.T_oid:
return &DOid{semanticType: t, DInt: v.DInt}, nil
case oid.T_regtype:
// Mapping an oid to a regtype is easy: we have a hardcoded map.
typ, ok := types.OidToType[oid.Oid(v.DInt)]
ret := &DOid{semanticType: t, DInt: v.DInt}
if !ok {
return ret, nil
}
ret.name = typ.PGName()
return ret, nil
default:
oid, err := queryOid(ctx, t, v)
if err != nil {
oid = NewDOid(v.DInt)
oid.semanticType = t
}
return oid, nil
}
return performIntToOidCast(ctx, t, v.DInt)
case *DInt:
// OIDs are always unsigned 32-bit integers. Some languages, like Java,
// store OIDs as signed 32-bit integers, so we implement the cast
// by converting to a uint32 first. This matches Postgres behavior.
i := DInt(uint32(*v))
switch t.Oid() {
case oid.T_oid:
return &DOid{semanticType: t, DInt: i}, nil
default:
tmpOid := NewDOid(i)
oid, err := queryOid(ctx, t, tmpOid)
if err != nil {
oid = tmpOid
oid.semanticType = t
}
return oid, nil
}
return performIntToOidCast(ctx, t, i)
case *DString:
return ParseDOid(ctx, string(*v), t)
}
Expand All @@ -1316,3 +1286,39 @@ func performCastWithoutPrecisionTruncation(ctx *EvalContext, d Datum, t *types.T
return nil, pgerror.Newf(
pgcode.CannotCoerce, "invalid cast: %s -> %s", d.ResolvedType(), t)
}

// performIntToOidCast casts the input integer to the OID type given by the
// input types.T.
func performIntToOidCast(ctx *EvalContext, t *types.T, v DInt) (Datum, error) {
switch t.Oid() {
case oid.T_oid:
return &DOid{semanticType: t, DInt: v}, nil
case oid.T_regtype:
// Mapping an oid to a regtype is easy: we have a hardcoded map.
typ, ok := types.OidToType[oid.Oid(v)]
ret := &DOid{semanticType: t, DInt: v}
if !ok {
return ret, nil
}
ret.name = typ.PGName()
return ret, nil

case oid.T_regproc, oid.T_regprocedure:
// Mapping an oid to a regproc is easy: we have a hardcoded map.
name, ok := OidToBuiltinName[oid.Oid(v)]
ret := &DOid{semanticType: t, DInt: v}
if !ok {
return ret, nil
}
ret.name = name
return ret, nil

default:
oid, err := queryOid(ctx, t, NewDOid(v))
if err != nil {
oid = NewDOid(v)
oid.semanticType = t
}
return oid, nil
}
}
11 changes: 10 additions & 1 deletion pkg/sql/sem/tree/datum.go
Original file line number Diff line number Diff line change
Expand Up @@ -4272,7 +4272,16 @@ func ParseDOid(ctx *EvalContext, s string, t *types.T) (*DOid, error) {
if err != nil {
return nil, err
}
return queryOid(ctx, t, NewDString(funcDef.Name))
if len(funcDef.Definition) > 1 {
return nil, pgerror.Newf(pgcode.AmbiguousAlias,
"more than one function named '%s'", funcDef.Name)
}
def := funcDef.Definition[0]
overload, ok := def.(*Overload)
if !ok {
return nil, errors.AssertionFailedf("invalid non-overload regproc %s", funcDef.Name)
}
return &DOid{semanticType: t, DInt: DInt(overload.Oid), name: funcDef.Name}, nil
case oid.T_regtype:
parsedTyp, err := ctx.Planner.GetTypeFromValidSQLSyntax(s)
if err == nil {
Expand Down
10 changes: 9 additions & 1 deletion pkg/sql/sem/tree/function_definition.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@

package tree

import "github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry"
import (
"github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry"
"github.com/lib/pq/oid"
)

// FunctionDefinition implements a reference to the (possibly several)
// overloads for a built-in function.
Expand Down Expand Up @@ -159,6 +162,11 @@ func NewFunctionDefinition(
// for every builtin function. Initialized by builtins.init().
var FunDefs map[string]*FunctionDefinition

// OidToBuiltinName contains a map from the hashed OID of all builtin functions
// to their name. We populate this from the pg_catalog.go file in the sql
// package because of dependency issues: we can't use oidHasher from this file.
var OidToBuiltinName map[oid.Oid]string

// Format implements the NodeFormatter interface.
func (fd *FunctionDefinition) Format(ctx *FmtCtx) {
ctx.WriteString(fd.Name)
Expand Down
5 changes: 5 additions & 0 deletions pkg/sql/sem/tree/overload.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/errors"
"github.com/lib/pq/oid"
)

// SpecializedVectorizedBuiltin is used to map overloads
Expand Down Expand Up @@ -85,6 +86,10 @@ type Overload struct {
// volatility against Postgres's volatility at test time.
// This should be used with caution.
IgnoreVolatilityCheck bool

// Oid is the cached oidHasher.BuiltinOid result for this Overload. It's
// populated at init-time.
Oid oid.Oid
}

// params implements the overloadImpl interface.
Expand Down

0 comments on commit 2581a86

Please sign in to comment.