Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
48556: sql: enable enum values to be stored in tables and used in queries r=rohany a=rohany

This PR enables the use of ENUM types in SQL queries and tables.

Release note (sql change): This PR enables the use of Postgres
ENUM types. It allows ENUMs to be used in queries and stored
in tables.

48588: rowcontainer: row container for inverted index computations r=sumeerbhola a=sumeerbhola

DiskBackedNumberedRowContainer is a container that stores
index => row mappings, where the index is densely numbered
starting from 0.
It has two uses:
- It can be used for joinReader (lookup join) to replace the
  current use of DiskBackedIndexedRowContainer.
- It has the additional ability to de-duplicate the key of the
  rows written to it (where all the columns in the row are in
  the key). This is the mode in which it will be used for inverted
  index computations.

The index => row mapping is stored using a DiskBackedRowContainer,
since the latter already has most of the needed support for
first writing to memory and then spilling to disk -- the key
is set to be empty. After spilling, when DiskRowContainer is the
underlying container, we rely on the RowID it introduces. For reading
from the DiskRowContainer, there is a new numberedRowIterator that
supports a seekToIndex() method, which can be used to avoid iterating
over many intermediate ssblocks.

The DiskBackedNumberedRowContainer has a numberedDiskRowIterator
that uses the aforementioned numberedRowIterator and decides when to
seek or next. Additionally, it contains an in-memory cache -- due
to the cache misses observed in issue 48118, this cache uses knowledge of
the full future access pattern that is known for both joinReader and
the inverted index cases to optimally cache. This caching algorithm
has not been experimentally validated and it is possible that it may
not improve cache hit rates (it will be significantly better than LRU
if the reuse distance is greater than the cache size). It has more
book-keeping overhead so some experimental validation will be done
before integrating with joinReader.

For de-duplication, needed for inverted index computations,
one additionally needs a row => index map.
This is constructed by modifying DiskBackedRowContainer and
introducing a de-duplicate mode. To make this work without flushing
the batch after each write, we maintain a map[string]uint64 for the
rows that are in the batch. The coordination between this map and the
batch flushes required the introduction of
SortedDiskMapBatchWriter.NumPutsSinceFlush()

This code currently lacks tests -- I am looking for early feedback.
It includes a strawman integration with joinReader -- that will be
removed before merging.

Release note: None

48763: roachtest: add expected passes to pgjdbc test r=apantel a=rafiss

Release note: none

Co-authored-by: Rohan Yadav <[email protected]>
Co-authored-by: sumeerbhola <[email protected]>
Co-authored-by: Rafi Shamim <[email protected]>
  • Loading branch information
4 people committed May 19, 2020
4 parents 516b9ec + faac86a + b9c758b + b1e38cb commit ef3fe57
Show file tree
Hide file tree
Showing 38 changed files with 2,024 additions and 186 deletions.
7 changes: 4 additions & 3 deletions docs/generated/sql/bnf/stmt_block.bnf
Original file line number Diff line number Diff line change
Expand Up @@ -1587,6 +1587,7 @@ case_expr ::=

simple_typename ::=
general_type_name
| '@' iconst32
| complex_type_name
| const_typename
| bit_with_length
Expand Down Expand Up @@ -1925,6 +1926,9 @@ case_default ::=
general_type_name ::=
type_function_name_no_crdb_extra

iconst32 ::=
'ICONST'

complex_type_name ::=
general_type_name '.' unrestricted_name
| general_type_name '.' unrestricted_name '.' unrestricted_name
Expand Down Expand Up @@ -2163,9 +2167,6 @@ opt_interval_qualifier ::=
interval_qualifier
|

iconst32 ::=
'ICONST'

func_application ::=
func_name '(' ')'
| func_name '(' expr_list opt_sort_clause ')'
Expand Down
8 changes: 8 additions & 0 deletions pkg/ccl/backupccl/backup_planning.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/storage/cloud"
"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
"github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/interval"
"github.com/cockroachdb/cockroach/pkg/util/log"
Expand Down Expand Up @@ -356,6 +357,13 @@ func backupPlanHook(
}
tables = append(tables, tableDesc)

// If the table has any user defined types, error out.
for _, col := range tableDesc.Columns {
if col.Type.UserDefined() {
return unimplemented.NewWithIssue(48689, "user defined types in backup")
}
}

// Collect all the table stats for this table.
tableStatisticsAcc, err := statsCache.GetTableStats(ctx, tableDesc.GetID())
if err != nil {
Expand Down
8 changes: 8 additions & 0 deletions pkg/ccl/logictestccl/testdata/logic_test/restore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,11 @@
# Check that we get through parsing and license check.
statement error pq: failed to open backup storage location: unsupported storage scheme: ""
RESTORE foo FROM "bar"

# Check that user defined types are disallowed in backups.
statement ok
CREATE TYPE t AS ENUM ('hello');
CREATE TABLE tt (x t)

statement error pq: unimplemented: user defined types in backup
BACKUP TABLE tt TO ""
9 changes: 5 additions & 4 deletions pkg/cmd/roachtest/pgjdbc.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,11 @@ func registerPgjdbc(r *testRegistry) {
}

r.Add(testSpec{
Name: "pgjdbc",
Owner: OwnerAppDev,
Cluster: makeClusterSpec(1),
Tags: []string{`default`, `driver`},
MinVersion: "v2.1.0",
Name: "pgjdbc",
Owner: OwnerAppDev,
Cluster: makeClusterSpec(1),
Tags: []string{`default`, `driver`},
Run: func(ctx context.Context, t *test, c *cluster) {
runPgjdbc(ctx, t, c)
},
Expand Down
207 changes: 79 additions & 128 deletions pkg/cmd/roachtest/pgjdbc_blacklist.go

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion pkg/kv/kvserver/diskmap/disk_map.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ type SortedDiskMapBatchWriter interface {
// Flush flushes all writes to the underlying store. The batch can be reused
// after a call to Flush().
Flush() error

// The number of put calls since the last time the writer was flushed.
NumPutsSinceFlush() int
// Close flushes all writes to the underlying store and frees up resources
// held by the batch writer.
Close(context.Context) error
Expand Down
1 change: 1 addition & 0 deletions pkg/server/server_sql.go
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,7 @@ func newSQLServer(ctx context.Context, cfg sqlServerArgs) (*sqlServer, error) {
cfg.gossip,
cfg.db,
cfg.circularInternalExecutor,
codec,
),

// Note: don't forget to add the secondary loggers as closers
Expand Down
4 changes: 4 additions & 0 deletions pkg/sql/create_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,10 @@ func (r *createStatsResumer) Resume(

dsp := p.DistSQLPlanner()
if err := p.ExecCfg().DB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
// Set the transaction on the EvalContext to this txn. This allows for
// use of the txn during processor setup during the execution of the flow.
evalCtx.Txn = txn

if details.AsOf != nil {
p.semaCtx.AsOfTimestamp = details.AsOf
p.extendedEvalCtx.SetTxnTimestamp(details.AsOf.GoTime())
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/execinfra/expr.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ func (eh *ExprHelper) Init(
}
var err error
semaContext := tree.MakeSemaContext()
semaContext.TypeResolver = evalCtx.DistSQLTypeResolver
eh.Expr, err = processExpression(expr, evalCtx, &semaContext, &eh.Vars)
if err != nil {
return err
Expand Down
4 changes: 3 additions & 1 deletion pkg/sql/execinfra/flow_context.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ type FlowCtx struct {
// them at runtime to ensure expressions are evaluated with the correct indexed
// var context.
func (ctx *FlowCtx) NewEvalCtx() *tree.EvalContext {
return ctx.EvalCtx.Copy()
evalCopy := ctx.EvalCtx.Copy()
evalCopy.DistSQLTypeResolver = &execinfrapb.DistSQLTypeResolver{EvalContext: evalCopy}
return evalCopy
}

// TestingKnobs returns the distsql testing knobs for this flow context.
Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/execinfra/processorsbase.go
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,12 @@ func (pb *ProcessorBase) InitWithEvalCtx(
pb.MemMonitor = memMonitor
pb.trailingMetaCallback = opts.TrailingMetaCallback
pb.inputsToDrain = opts.InputsToDrain

// Hydrate all types used in the processor.
if err := execinfrapb.HydrateTypeSlice(evalCtx, types); err != nil {
return err
}

return pb.Out.Init(post, types, pb.EvalCtx, output)
}

Expand Down
67 changes: 65 additions & 2 deletions pkg/sql/execinfrapb/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
"github.com/cockroachdb/errors"
Expand Down Expand Up @@ -71,11 +72,73 @@ func ConvertToMappedSpecOrdering(
return specOrdering
}

// DistSQLTypeResolver implements tree.ResolvableTypeReference for accessing
// type information during DistSQL query evaluation.
type DistSQLTypeResolver struct {
EvalContext *tree.EvalContext
// TODO (rohany): This struct should locally cache id -> types.T here
// so that repeated lookups do not incur additional KV operations.
}

// ResolveType implements tree.ResolvableTypeReference.
func (tr *DistSQLTypeResolver) ResolveType(name *tree.UnresolvedObjectName) (*types.T, error) {
return nil, errors.AssertionFailedf("cannot resolve types in DistSQL by name")
}

// ResolveTypeByID implements tree.ResolvableTypeReference.
func (tr *DistSQLTypeResolver) ResolveTypeByID(id uint32) (*types.T, error) {
// TODO (rohany): This should eventually look into the set of cached type
// descriptors before attempting to access it here.
typDesc, err := sqlbase.GetTypeDescFromID(
tr.EvalContext.Context,
tr.EvalContext.Txn,
tr.EvalContext.Codec,
sqlbase.ID(id),
)
if err != nil {
return nil, err
}
var typ *types.T
switch t := typDesc.Kind; t {
case sqlbase.TypeDescriptor_ENUM:
typ = types.MakeEnum(id)
default:
return nil, errors.AssertionFailedf("unknown type kind %s", t)
}
if err := typDesc.HydrateTypeInfo(typ); err != nil {
return nil, err
}
return typ, nil
}

// HydrateTypeSlice hydrates all user defined types in an input slice of types.
func HydrateTypeSlice(evalCtx *tree.EvalContext, typs []*types.T) error {
for _, t := range typs {
if t.UserDefined() {
// TODO (rohany): This should eventually look into the set of cached type
// descriptors before attempting to access it here.
typDesc, err := sqlbase.GetTypeDescFromID(
evalCtx.Context,
evalCtx.Txn,
evalCtx.Codec,
sqlbase.ID(t.StableTypeID()),
)
if err != nil {
return err
}
if err := typDesc.HydrateTypeInfo(t); err != nil {
return err
}
}
}
return nil
}

// ExprFmtCtxBase produces a FmtCtx used for serializing expressions; a proper
// IndexedVar formatting function needs to be added on. It replaces placeholders
// with their values.
func ExprFmtCtxBase(evalCtx *tree.EvalContext) *tree.FmtCtx {
fmtCtx := tree.NewFmtCtx(tree.FmtCheckEquivalence)
fmtCtx := tree.NewFmtCtx(tree.FmtDistSQLSerialization)
fmtCtx.SetPlaceholderFormat(
func(fmtCtx *tree.FmtCtx, p *tree.Placeholder) {
d, err := p.Eval(evalCtx)
Expand Down Expand Up @@ -115,7 +178,7 @@ func (e *Expression) Empty() bool {
// String implements the Stringer interface.
func (e Expression) String() string {
if e.LocalExpr != nil {
ctx := tree.NewFmtCtx(tree.FmtCheckEquivalence)
ctx := tree.NewFmtCtx(tree.FmtDistSQLSerialization)
ctx.FormatNode(e.LocalExpr)
return ctx.CloseAndGetString()
}
Expand Down
Loading

0 comments on commit ef3fe57

Please sign in to comment.