From f58e24873e26e82f6a55d910887de548ecd1b214 Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Tue, 29 Nov 2022 17:37:15 -0800 Subject: [PATCH 1/3] sql: fix left semi and left anti virtual lookup joins This commit fixes the execution of the left semi and left anti virtual lookup joins. The bug was that we forgot to project away the looked up columns (coming from the "right" side) which could then lead to wrong columns being used higher up the tree. The bug was introduced during 22.1 release cycle where we added the optimizer support for generating plans that could contain left semi and left anti virtual lookup joins. This commit fixes that issue as well as the output columns of such joins (I'm not sure whether there is a user facing impact of having incorrect "output columns"). Additionally, this commit fixes the execution of these virtual lookup joins to correctly return the input row only once. Previously, for left anti joins we'd be producing an output row if there was a match (which is wrong), and for both left semi and left anti we would emit an output row every time there was a match (but this should be done only once). (Although I'm not sure whether it is possible for virtual indexes to result in multiple looked up rows.) Also, as a minor simplification this commit makes it so that the output rows are not added into the row container for left semi and left anti and the container is not instantiated at all. Release note (bug fix): CockroachDB previously could incorrectly evaluate queries that performed left semi and left anti "virtual lookup" joins on tables in `pg_catalog` or `information_schema`. These join types can be planned when a subquery is used inside of a filter condition. The bug was introduced in 22.1.0 and is now fixed. --- .../logictest/testdata/logic_test/pg_catalog | 36 +++++++++++ pkg/sql/opt/xform/join_funcs.go | 1 + pkg/sql/opt_exec_factory.go | 16 +++-- pkg/sql/virtual_table.go | 62 +++++++++++++++---- 4 files changed, 98 insertions(+), 17 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/pg_catalog b/pkg/sql/logictest/testdata/logic_test/pg_catalog index 6e89d4971930..e1548cae2159 100644 --- a/pkg/sql/logictest/testdata/logic_test/pg_catalog +++ b/pkg/sql/logictest/testdata/logic_test/pg_catalog @@ -4674,3 +4674,39 @@ order by attnum; attnum attname 1 x 2 y + +# Regression test for not projecting away looked up columns by the left semi +# virtual lookup join (#91012). +statement ok +CREATE TABLE t91012 (id INT, a_id INT); + +query I +SELECT + count(*) +FROM + pg_class AS t INNER JOIN pg_attribute AS a ON t.oid = a.attrelid +WHERE + a.attnotnull = 'f' + AND a.attname = 'a_id' + AND t.relname = 't91012' + AND a.atttypid + IN (SELECT oid FROM pg_type WHERE typname = ANY (ARRAY['int8'])); +---- +1 + +# Regression test for incorrectly handling left anti virtual lookup joins +# (#88096). +statement ok +CREATE TYPE mytype AS enum('hello') + +query I +SELECT + count(*) +FROM + pg_type AS t +WHERE + t.typrelid = 0 + AND NOT EXISTS(SELECT 1 FROM pg_type AS el WHERE el.oid = t.typelem AND el.typarray = t.oid) + AND t.typname LIKE 'myt%' +---- +1 diff --git a/pkg/sql/opt/xform/join_funcs.go b/pkg/sql/opt/xform/join_funcs.go index 8260993153e3..f2e58c36df5a 100644 --- a/pkg/sql/opt/xform/join_funcs.go +++ b/pkg/sql/opt/xform/join_funcs.go @@ -292,6 +292,7 @@ func (c *CustomFuncs) GenerateLookupJoins( // // It should be possible to support semi- and anti- joins. Left joins may be // possible with additional complexity. +// TODO(mgartner): update this comment. // // It should also be possible to support cases where all the virtual columns are // not covered by a single index by wrapping the lookup join in a Project that diff --git a/pkg/sql/opt_exec_factory.go b/pkg/sql/opt_exec_factory.go index bb85c5491ada..0f1987abb886 100644 --- a/pkg/sql/opt_exec_factory.go +++ b/pkg/sql/opt_exec_factory.go @@ -790,10 +790,18 @@ func (ef *execFactory) constructVirtualTableLookupJoin( tableScan.index = idx vtableCols := colinfo.ResultColumnsFromColumns(tableDesc.GetID(), tableDesc.PublicColumns()) projectedVtableCols := planColumns(&tableScan) - outputCols := make(colinfo.ResultColumns, 0, len(inputCols)+len(projectedVtableCols)) - outputCols = append(outputCols, inputCols...) - outputCols = append(outputCols, projectedVtableCols...) - // joinType is either INNER or LEFT_OUTER. + var outputCols colinfo.ResultColumns + switch joinType { + case descpb.InnerJoin, descpb.LeftOuterJoin: + outputCols = make(colinfo.ResultColumns, 0, len(inputCols)+len(projectedVtableCols)) + outputCols = append(outputCols, inputCols...) + outputCols = append(outputCols, projectedVtableCols...) + case descpb.LeftSemiJoin, descpb.LeftAntiJoin: + outputCols = make(colinfo.ResultColumns, 0, len(inputCols)) + outputCols = append(outputCols, inputCols...) + default: + return nil, errors.AssertionFailedf("unexpected join type for virtual lookup join: %s", joinType.String()) + } pred := makePredicate(joinType, inputCols, projectedVtableCols) pred.onCond = pred.iVarHelper.Rebind(onCond) n := &vTableLookupJoinNode{ diff --git a/pkg/sql/virtual_table.go b/pkg/sql/virtual_table.go index f38054cd3584..3c7fa3790aa2 100644 --- a/pkg/sql/virtual_table.go +++ b/pkg/sql/virtual_table.go @@ -235,9 +235,13 @@ type vTableLookupJoinNode struct { // run contains the runtime state of this planNode. run struct { + // matched indicates whether the current input row had at least one + // match. + matched bool // row contains the next row to output. row tree.Datums - // rows contains the next rows to output, except for row. + // rows contains the next rows to output, except for row. Only allocated + // for inner and left outer joins. rows *rowcontainer.RowContainer keyCtx constraint.KeyContext @@ -257,10 +261,14 @@ var _ rowPusher = &vTableLookupJoinNode{} // startExec implements the planNode interface. func (v *vTableLookupJoinNode) startExec(params runParams) error { v.run.keyCtx = constraint.KeyContext{EvalCtx: params.EvalContext()} - v.run.rows = rowcontainer.NewRowContainer( - params.p.Mon().MakeBoundAccount(), - colinfo.ColTypeInfoFromResCols(v.columns), - ) + if v.joinType == descpb.InnerJoin || v.joinType == descpb.LeftOuterJoin { + v.run.rows = rowcontainer.NewRowContainer( + params.p.Mon().MakeBoundAccount(), + colinfo.ColTypeInfoFromResCols(v.columns), + ) + } else if v.joinType != descpb.LeftSemiJoin && v.joinType != descpb.LeftAntiJoin { + return errors.AssertionFailedf("unexpected join type for virtual lookup join: %s", v.joinType.String()) + } v.run.indexKeyDatums = make(tree.Datums, len(v.columns)) var err error db, err := params.p.Descriptors().GetImmutableDatabaseByName( @@ -285,7 +293,7 @@ func (v *vTableLookupJoinNode) Next(params runParams) (bool, error) { v.run.params = ¶ms for { // Check if there are any rows left to emit from the last input row. - if v.run.rows.Len() > 0 { + if v.run.rows != nil && v.run.rows.Len() > 0 { copy(v.run.row, v.run.rows.At(0)) v.run.rows.PopFirst(params.ctx) return true, nil @@ -315,18 +323,38 @@ func (v *vTableLookupJoinNode) Next(params runParams) (bool, error) { ) // Add the input row to the left of the scratch row. v.run.row = append(v.run.row[:0], inputRow...) + v.run.matched = false // Finally, we're ready to do the lookup. This invocation will push all of // the looked-up rows into v.run.rows. if err := genFunc(params.ctx, v); err != nil { return false, err } - if v.run.rows.Len() == 0 && v.joinType == descpb.LeftOuterJoin { - // No matches - construct an outer match. - v.run.row = v.run.row[:len(v.inputCols)] - for i := len(inputRow); i < len(v.columns); i++ { - v.run.row = append(v.run.row, tree.DNull) + switch v.joinType { + case descpb.LeftOuterJoin: + if !v.run.matched { + // No matches - construct an outer match. + v.run.row = v.run.row[:len(v.inputCols)] + for i := len(inputRow); i < len(v.columns); i++ { + v.run.row = append(v.run.row, tree.DNull) + } + return true, nil + } + case descpb.LeftSemiJoin: + if v.run.matched { + // This input row had a match, so it should be emitted. + // + // Reset our output row to just the contents of the input row. + v.run.row = v.run.row[:len(v.inputCols)] + return true, nil + } + case descpb.LeftAntiJoin: + if !v.run.matched { + // This input row didn't have a match, so it should be emitted. + // + // Reset our output row to just the contents of the input row. + v.run.row = v.run.row[:len(v.inputCols)] + return true, nil } - return true, nil } } } @@ -350,6 +378,12 @@ func (v *vTableLookupJoinNode) pushRow(lookedUpRow ...tree.Datum) error { ); !ok || err != nil { return err } + v.run.matched = true + if v.joinType == descpb.LeftSemiJoin || v.joinType == descpb.LeftAntiJoin { + // Avoid adding the row into the container since for left semi and left + // anti joins we only care to know whether there was a match or not. + return nil + } _, err := v.run.rows.AddRow(v.run.params.ctx, v.run.row) return err } @@ -362,5 +396,7 @@ func (v *vTableLookupJoinNode) Values() tree.Datums { // Close implements the planNode interface. func (v *vTableLookupJoinNode) Close(ctx context.Context) { v.input.Close(ctx) - v.run.rows.Close(ctx) + if v.run.rows != nil { + v.run.rows.Close(ctx) + } } From 9f2f119520876d4c5e54468ec63e55ab6a6c04fa Mon Sep 17 00:00:00 2001 From: healthy-pod Date: Wed, 30 Nov 2022 17:19:10 -0800 Subject: [PATCH 2/3] nogo: detect deprecated go standard library packages In #87327, we patched `staticcheck` to detect deprecated "objects" from the standard library. This patch ensures that we also detect deprecated "packages". Closes #84877 Release note: None --- build/bazelutil/nogo_config.json | 9 ++++++++- build/patches/co_honnef_go_tools.patch | 11 ++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/build/bazelutil/nogo_config.json b/build/bazelutil/nogo_config.json index 4f6f5b2f979a..34a2c9a23424 100644 --- a/build/bazelutil/nogo_config.json +++ b/build/bazelutil/nogo_config.json @@ -739,7 +739,14 @@ "pkg/sql/execinfra/processorsbase.go$": "temporary exclusion until deprecatedContext is removed from eval.Context", "pkg/sql/importer/import_table_creation.go$": "temporary exclusion until deprecatedContext is removed from eval.Context", "pkg/sql/row/expr_walker_test.go$": "temporary exclusion until deprecatedContext is removed from eval.Context", - "pkg/sql/schemachanger/scbuild/tree_context_builder.go$": "temporary exclusion until deprecatedContext is removed from eval.Context" + "pkg/sql/schemachanger/scbuild/tree_context_builder.go$": "temporary exclusion until deprecatedContext is removed from eval.Context", + "pkg/security/securityassets/security_assets.go$": "excluded until all uses of io/ioutil are replaced", + "pkg/roachprod/install/cluster_synced.go$": "excluded until all uses of io/ioutil are replaced", + "pkg/security/securitytest/securitytest.go$": "excluded until all uses of io/ioutil are replaced", + "pkg/server/dumpstore/dumpstore.go$": "excluded until all uses of io/ioutil are replaced", + "pkg/server/dumpstore/dumpstore_test.go$": "excluded until all uses of io/ioutil are replaced", + "pkg/server/heapprofiler/profilestore_test.go$": "excluded until all uses of io/ioutil are replaced", + "pkg/util/log/file_api.go$": "excluded until all uses of io/ioutil are replaced" }, "only_files": { "cockroach/pkg/.*$": "first-party code" diff --git a/build/patches/co_honnef_go_tools.patch b/build/patches/co_honnef_go_tools.patch index ea4e0b73946e..8dee2b6bb8cf 100644 --- a/build/patches/co_honnef_go_tools.patch +++ b/build/patches/co_honnef_go_tools.patch @@ -1,5 +1,5 @@ diff --git a/staticcheck/lint.go b/staticcheck/lint.go -index 85bcb21..e91e81c 100644 +index 85bcb21..9fb4cc0 100644 --- a/staticcheck/lint.go +++ b/staticcheck/lint.go @@ -3080,6 +3080,9 @@ func CheckDeprecated(pass *analysis.Pass) (interface{}, error) { @@ -44,3 +44,12 @@ index 85bcb21..e91e81c 100644 } return true } +@@ -3209,6 +3214,8 @@ func CheckDeprecated(pass *analysis.Pass) (interface{}, error) { + } + + handleDeprecation(depr, spec.Path, path, path, nil) ++ } else if _, ok := knowledge.StdlibDeprecations[path]; ok { ++ handleDeprecation(nil, spec.Path, path, path, nil) + } + } + pass.ResultOf[inspect.Analyzer].(*inspector.Inspector).Nodes(nil, fn) From 170baed74f942bb98730c7398d08e10fe5da4f71 Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Thu, 1 Dec 2022 10:56:30 -0800 Subject: [PATCH 3/3] rowcontainer: address an old TODO This commit addresses an old TODO about figuring out why we cannot reuse the same buffer in `diskRowIterator.Row` method. The contract of that method was previously confusing - it says that the call to `Row` invalidates the row returned on the previous call; however, the important piece was missing - that the datums themselves cannot be mutated (this is what we assume elsewhere and perform only the "shallow" copies). This commit clarifies the contract of the method and explicitly explains why we need to allocate fresh byte slices (which is done via `ByteAllocator` to reduce the number of allocations). Additional context can be found in #43145 which added this copy in the first place. Here is a relevant quote from Alfonso: ``` I think what's going on here is that this type of contract (you may only reuse the row until the next call) is a bit unclear. `CopyRow`s of `EncDatum`s are only implemented as shallow copies, so the reference to this reuse only applies to the `EncDatumRow`, but not to the `encoded` field, which is what is rewritten in this case. The `encoded` field will not be copied, so I think the tests are probably failing due to that. This is unfortunate and there doesn't seem to be a good reason for it. To implement deep copies, we will have to implement deep copies for `Datum`s as well. ``` I think we were under the impression that we'd implement the "deep copy" in `CopyRow`, but I highly doubt we'll do so given that we're mostly investing in the columnar infrastructure nowadays, and the current way of performing shallow copying has worked well long enough. Epic: None Release note: None --- pkg/sql/rowcontainer/BUILD.bazel | 1 + pkg/sql/rowcontainer/disk_row_container.go | 29 +++++++++------------- pkg/sql/rowcontainer/row_container.go | 3 ++- 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/pkg/sql/rowcontainer/BUILD.bazel b/pkg/sql/rowcontainer/BUILD.bazel index 44db8cc14039..90d87cf8de36 100644 --- a/pkg/sql/rowcontainer/BUILD.bazel +++ b/pkg/sql/rowcontainer/BUILD.bazel @@ -28,6 +28,7 @@ go_library( "//pkg/sql/sqlerrors", "//pkg/sql/types", "//pkg/util", + "//pkg/util/bufalloc", "//pkg/util/cancelchecker", "//pkg/util/encoding", "//pkg/util/hlc", diff --git a/pkg/sql/rowcontainer/disk_row_container.go b/pkg/sql/rowcontainer/disk_row_container.go index d79479501806..88cc9aad7524 100644 --- a/pkg/sql/rowcontainer/disk_row_container.go +++ b/pkg/sql/rowcontainer/disk_row_container.go @@ -22,6 +22,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/rowenc" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/types" + "github.com/cockroachdb/cockroach/pkg/util/bufalloc" "github.com/cockroachdb/cockroach/pkg/util/encoding" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/mon" @@ -394,8 +395,9 @@ func (d *DiskRowContainer) Close(ctx context.Context) { } // keyValToRow decodes a key and a value byte slice stored with AddRow() into -// a sqlbase.EncDatumRow. The returned EncDatumRow is only valid until the next -// call to keyValToRow(). +// a rowenc.EncDatumRow. The returned EncDatumRow is only valid until the next +// call to keyValToRow(). The passed in byte slices are used directly, and it is +// the caller's responsibility to make sure they don't get modified. func (d *DiskRowContainer) keyValToRow(k []byte, v []byte) (rowenc.EncDatumRow, error) { for i, orderInfo := range d.ordering { // Types with composite key encodings are decoded from the value. @@ -430,7 +432,7 @@ func (d *DiskRowContainer) keyValToRow(k []byte, v []byte) (rowenc.EncDatumRow, // diskRowIterator iterates over the rows in a DiskRowContainer. type diskRowIterator struct { rowContainer *DiskRowContainer - rowBuf []byte + rowBuf bufalloc.ByteAllocator diskmap.SortedDiskMapIterator } @@ -452,7 +454,7 @@ func (d *DiskRowContainer) NewIterator(ctx context.Context) RowIterator { return &i } -// Row returns the current row. The returned sqlbase.EncDatumRow is only valid +// Row returns the current row. The returned rowenc.EncDatumRow is only valid // until the next call to Row(). func (r *diskRowIterator) Row() (rowenc.EncDatumRow, error) { if ok, err := r.Valid(); err != nil { @@ -463,19 +465,12 @@ func (r *diskRowIterator) Row() (rowenc.EncDatumRow, error) { k := r.UnsafeKey() v := r.UnsafeValue() - // TODO(asubiotto): the "true ||" should not be necessary. We should be to - // reuse rowBuf, yet doing so causes - // TestDiskBackedIndexedRowContainer/ReorderingOnDisk, TestHashJoiner, and - // TestSorter to fail. Some caller of Row() is presumably not making a copy - // of the return value. - if true || cap(r.rowBuf) < len(k)+len(v) { - r.rowBuf = make([]byte, 0, len(k)+len(v)) - } - r.rowBuf = r.rowBuf[:len(k)+len(v)] - copy(r.rowBuf, k) - copy(r.rowBuf[len(k):], v) - k = r.rowBuf[:len(k)] - v = r.rowBuf[len(k):] + // keyValToRow will use the encoded key and value bytes as is by shoving + // them directly into the EncDatum, so we need to make a copy here. We + // cannot reuse the same byte slice across Row() calls because it would lead + // to modification of the EncDatums (which is not allowed). + r.rowBuf, k = r.rowBuf.Copy(k, len(v)) + r.rowBuf, v = r.rowBuf.Copy(v, 0 /* extraCap */) return r.rowContainer.keyValToRow(k, v) } diff --git a/pkg/sql/rowcontainer/row_container.go b/pkg/sql/rowcontainer/row_container.go index fdd7ecab2eb5..2d16b21f5f07 100644 --- a/pkg/sql/rowcontainer/row_container.go +++ b/pkg/sql/rowcontainer/row_container.go @@ -132,7 +132,8 @@ type RowIterator interface { // Next advances the iterator to the next row in the iteration. Next() // Row returns the current row. The returned row is only valid until the - // next call to Rewind() or Next(). + // next call to Rewind() or Next(). However, datums in the row won't be + // modified, so shallow copying is sufficient. Row() (rowenc.EncDatumRow, error) // Close frees up resources held by the iterator.