Merge #48556 #48588 #48763

48556: sql: enable enum values to be stored in tables and used in queries r=rohany a=rohany This PR enables the use of ENUM types in SQL queries and tables. Release note (sql change): This PR enables the use of Postgres ENUM types. It allows ENUMs to be used in queries and stored in tables. 48588: rowcontainer: row container for inverted index computations r=sumeerbhola a=sumeerbhola DiskBackedNumberedRowContainer is a container that stores index => row mappings, where the index is densely numbered starting from 0. It has two uses: - It can be used for joinReader (lookup join) to replace the current use of DiskBackedIndexedRowContainer. - It has the additional ability to de-duplicate the key of the rows written to it (where all the columns in the row are in the key). This is the mode in which it will be used for inverted index computations. The index => row mapping is stored using a DiskBackedRowContainer, since the latter already has most of the needed support for first writing to memory and then spilling to disk -- the key is set to be empty. After spilling, when DiskRowContainer is the underlying container, we rely on the RowID it introduces. For reading from the DiskRowContainer, there is a new numberedRowIterator that supports a seekToIndex() method, which can be used to avoid iterating over many intermediate ssblocks. The DiskBackedNumberedRowContainer has a numberedDiskRowIterator that uses the aforementioned numberedRowIterator and decides when to seek or next. Additionally, it contains an in-memory cache -- due to the cache misses observed in issue 48118, this cache uses knowledge of the full future access pattern that is known for both joinReader and the inverted index cases to optimally cache. This caching algorithm has not been experimentally validated and it is possible that it may not improve cache hit rates (it will be significantly better than LRU if the reuse distance is greater than the cache size). It has more book-keeping overhead so some experimental validation will be done before integrating with joinReader. For de-duplication, needed for inverted index computations, one additionally needs a row => index map. This is constructed by modifying DiskBackedRowContainer and introducing a de-duplicate mode. To make this work without flushing the batch after each write, we maintain a map[string]uint64 for the rows that are in the batch. The coordination between this map and the batch flushes required the introduction of SortedDiskMapBatchWriter.NumPutsSinceFlush() This code currently lacks tests -- I am looking for early feedback. It includes a strawman integration with joinReader -- that will be removed before merging. Release note: None 48763: roachtest: add expected passes to pgjdbc test r=apantel a=rafiss Release note: none Co-authored-by: Rohan Yadav <[email protected]> Co-authored-by: sumeerbhola <[email protected]> Co-authored-by: Rafi Shamim <[email protected]>
cockroachdb · May 19, 2020 · ef3fe57 · ef3fe57
4 parents 516b9ec + faac86a + b9c758b + b1e38cb
commit ef3fe57
Show file tree

Hide file tree

Showing 38 changed files with 2,024 additions and 186 deletions.
diff --git a/docs/generated/sql/bnf/stmt_block.bnf b/docs/generated/sql/bnf/stmt_block.bnf
@@ -1587,6 +1587,7 @@ case_expr ::=
 
 simple_typename ::=
 	general_type_name
+	| '@' iconst32
 	| complex_type_name
 	| const_typename
 	| bit_with_length
@@ -1925,6 +1926,9 @@ case_default ::=
 general_type_name ::=
 	type_function_name_no_crdb_extra
 
+iconst32 ::=
+	'ICONST'
+
 complex_type_name ::=
 	general_type_name '.' unrestricted_name
 	| general_type_name '.' unrestricted_name '.' unrestricted_name
@@ -2163,9 +2167,6 @@ opt_interval_qualifier ::=
 	interval_qualifier
 	| 
 
-iconst32 ::=
-	'ICONST'
-
 func_application ::=
 	func_name '(' ')'
 	| func_name '(' expr_list opt_sort_clause ')'

diff --git a/pkg/ccl/backupccl/backup_planning.go b/pkg/ccl/backupccl/backup_planning.go
@@ -34,6 +34,7 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/sql/types"
 	"github.com/cockroachdb/cockroach/pkg/storage/cloud"
 	"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
+	"github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented"
 	"github.com/cockroachdb/cockroach/pkg/util/hlc"
 	"github.com/cockroachdb/cockroach/pkg/util/interval"
 	"github.com/cockroachdb/cockroach/pkg/util/log"
@@ -356,6 +357,13 @@ func backupPlanHook(
 				}
 				tables = append(tables, tableDesc)
 
+				// If the table has any user defined types, error out.
+				for _, col := range tableDesc.Columns {
+					if col.Type.UserDefined() {
+						return unimplemented.NewWithIssue(48689, "user defined types in backup")
+					}
+				}
+
 				// Collect all the table stats for this table.
 				tableStatisticsAcc, err := statsCache.GetTableStats(ctx, tableDesc.GetID())
 				if err != nil {

diff --git a/pkg/ccl/logictestccl/testdata/logic_test/restore b/pkg/ccl/logictestccl/testdata/logic_test/restore
@@ -3,3 +3,11 @@
 # Check that we get through parsing and license check.
 statement error pq: failed to open backup storage location: unsupported storage scheme: ""
 RESTORE foo FROM "bar"
+
+# Check that user defined types are disallowed in backups.
+statement ok
+CREATE TYPE t AS ENUM ('hello');
+CREATE TABLE tt (x t)
+
+statement error pq: unimplemented: user defined types in backup
+BACKUP TABLE tt TO ""
diff --git a/pkg/cmd/roachtest/pgjdbc.go b/pkg/cmd/roachtest/pgjdbc.go
@@ -178,10 +178,11 @@ func registerPgjdbc(r *testRegistry) {
 	}
 
 	r.Add(testSpec{
-		Name:    "pgjdbc",
-		Owner:   OwnerAppDev,
-		Cluster: makeClusterSpec(1),
-		Tags:    []string{`default`, `driver`},
+		MinVersion: "v2.1.0",
+		Name:       "pgjdbc",
+		Owner:      OwnerAppDev,
+		Cluster:    makeClusterSpec(1),
+		Tags:       []string{`default`, `driver`},
 		Run: func(ctx context.Context, t *test, c *cluster) {
 			runPgjdbc(ctx, t, c)
 		},

diff --git a/pkg/cmd/roachtest/pgjdbc_blacklist.go b/pkg/cmd/roachtest/pgjdbc_blacklist.go
diff --git a/pkg/kv/kvserver/diskmap/disk_map.go b/pkg/kv/kvserver/diskmap/disk_map.go
@@ -70,7 +70,8 @@ type SortedDiskMapBatchWriter interface {
 	// Flush flushes all writes to the underlying store. The batch can be reused
 	// after a call to Flush().
 	Flush() error
-
+	// The number of put calls since the last time the writer was flushed.
+	NumPutsSinceFlush() int
 	// Close flushes all writes to the underlying store and frees up resources
 	// held by the batch writer.
 	Close(context.Context) error

diff --git a/pkg/server/server_sql.go b/pkg/server/server_sql.go
@@ -412,6 +412,7 @@ func newSQLServer(ctx context.Context, cfg sqlServerArgs) (*sqlServer, error) {
 			cfg.gossip,
 			cfg.db,
 			cfg.circularInternalExecutor,
+			codec,
 		),
 
 		// Note: don't forget to add the secondary loggers as closers

diff --git a/pkg/sql/create_stats.go b/pkg/sql/create_stats.go
@@ -397,6 +397,10 @@ func (r *createStatsResumer) Resume(
 
 	dsp := p.DistSQLPlanner()
 	if err := p.ExecCfg().DB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
+		// Set the transaction on the EvalContext to this txn. This allows for
+		// use of the txn during processor setup during the execution of the flow.
+		evalCtx.Txn = txn
+
 		if details.AsOf != nil {
 			p.semaCtx.AsOfTimestamp = details.AsOf
 			p.extendedEvalCtx.SetTxnTimestamp(details.AsOf.GoTime())

diff --git a/pkg/sql/execinfra/expr.go b/pkg/sql/execinfra/expr.go
@@ -158,6 +158,7 @@ func (eh *ExprHelper) Init(
 	}
 	var err error
 	semaContext := tree.MakeSemaContext()
+	semaContext.TypeResolver = evalCtx.DistSQLTypeResolver
 	eh.Expr, err = processExpression(expr, evalCtx, &semaContext, &eh.Vars)
 	if err != nil {
 		return err

diff --git a/pkg/sql/execinfra/flow_context.go b/pkg/sql/execinfra/flow_context.go
@@ -70,7 +70,9 @@ type FlowCtx struct {
 // them at runtime to ensure expressions are evaluated with the correct indexed
 // var context.
 func (ctx *FlowCtx) NewEvalCtx() *tree.EvalContext {
-	return ctx.EvalCtx.Copy()
+	evalCopy := ctx.EvalCtx.Copy()
+	evalCopy.DistSQLTypeResolver = &execinfrapb.DistSQLTypeResolver{EvalContext: evalCopy}
+	return evalCopy
 }
 
 // TestingKnobs returns the distsql testing knobs for this flow context.

diff --git a/pkg/sql/execinfra/processorsbase.go b/pkg/sql/execinfra/processorsbase.go
@@ -796,6 +796,12 @@ func (pb *ProcessorBase) InitWithEvalCtx(
 	pb.MemMonitor = memMonitor
 	pb.trailingMetaCallback = opts.TrailingMetaCallback
 	pb.inputsToDrain = opts.InputsToDrain
+
+	// Hydrate all types used in the processor.
+	if err := execinfrapb.HydrateTypeSlice(evalCtx, types); err != nil {
+		return err
+	}
+
 	return pb.Out.Init(post, types, pb.EvalCtx, output)
 }
 

diff --git a/pkg/sql/execinfrapb/data.go b/pkg/sql/execinfrapb/data.go
@@ -18,6 +18,7 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/roachpb"
 	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
 	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
+	"github.com/cockroachdb/cockroach/pkg/sql/types"
 	"github.com/cockroachdb/cockroach/pkg/util/encoding"
 	"github.com/cockroachdb/cockroach/pkg/util/tracing"
 	"github.com/cockroachdb/errors"
@@ -71,11 +72,73 @@ func ConvertToMappedSpecOrdering(
 	return specOrdering
 }
 
+// DistSQLTypeResolver implements tree.ResolvableTypeReference for accessing
+// type information during DistSQL query evaluation.
+type DistSQLTypeResolver struct {
+	EvalContext *tree.EvalContext
+	// TODO (rohany): This struct should locally cache id -> types.T here
+	//  so that repeated lookups do not incur additional KV operations.
+}
+
+// ResolveType implements tree.ResolvableTypeReference.
+func (tr *DistSQLTypeResolver) ResolveType(name *tree.UnresolvedObjectName) (*types.T, error) {
+	return nil, errors.AssertionFailedf("cannot resolve types in DistSQL by name")
+}
+
+// ResolveTypeByID implements tree.ResolvableTypeReference.
+func (tr *DistSQLTypeResolver) ResolveTypeByID(id uint32) (*types.T, error) {
+	// TODO (rohany): This should eventually look into the set of cached type
+	//  descriptors before attempting to access it here.
+	typDesc, err := sqlbase.GetTypeDescFromID(
+		tr.EvalContext.Context,
+		tr.EvalContext.Txn,
+		tr.EvalContext.Codec,
+		sqlbase.ID(id),
+	)
+	if err != nil {
+		return nil, err
+	}
+	var typ *types.T
+	switch t := typDesc.Kind; t {
+	case sqlbase.TypeDescriptor_ENUM:
+		typ = types.MakeEnum(id)
+	default:
+		return nil, errors.AssertionFailedf("unknown type kind %s", t)
+	}
+	if err := typDesc.HydrateTypeInfo(typ); err != nil {
+		return nil, err
+	}
+	return typ, nil
+}
+
+// HydrateTypeSlice hydrates all user defined types in an input slice of types.
+func HydrateTypeSlice(evalCtx *tree.EvalContext, typs []*types.T) error {
+	for _, t := range typs {
+		if t.UserDefined() {
+			// TODO (rohany): This should eventually look into the set of cached type
+			//  descriptors before attempting to access it here.
+			typDesc, err := sqlbase.GetTypeDescFromID(
+				evalCtx.Context,
+				evalCtx.Txn,
+				evalCtx.Codec,
+				sqlbase.ID(t.StableTypeID()),
+			)
+			if err != nil {
+				return err
+			}
+			if err := typDesc.HydrateTypeInfo(t); err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
+
 // ExprFmtCtxBase produces a FmtCtx used for serializing expressions; a proper
 // IndexedVar formatting function needs to be added on. It replaces placeholders
 // with their values.
 func ExprFmtCtxBase(evalCtx *tree.EvalContext) *tree.FmtCtx {
-	fmtCtx := tree.NewFmtCtx(tree.FmtCheckEquivalence)
+	fmtCtx := tree.NewFmtCtx(tree.FmtDistSQLSerialization)
 	fmtCtx.SetPlaceholderFormat(
 		func(fmtCtx *tree.FmtCtx, p *tree.Placeholder) {
 			d, err := p.Eval(evalCtx)
@@ -115,7 +178,7 @@ func (e *Expression) Empty() bool {
 // String implements the Stringer interface.
 func (e Expression) String() string {
 	if e.LocalExpr != nil {
-		ctx := tree.NewFmtCtx(tree.FmtCheckEquivalence)
+		ctx := tree.NewFmtCtx(tree.FmtDistSQLSerialization)
 		ctx.FormatNode(e.LocalExpr)
 		return ctx.CloseAndGetString()
 	}
-Original file line number
+Diff line change
@@ Expand Up @@
     			cfg.gossip,
     			cfg.db,
     			cfg.circularInternalExecutor,
+    			codec,
     		),
     		// Note: don't forget to add the secondary loggers as closers
@@ Expand Down @@