From ad26b2da1a5f784f867b471dc647fc6b593711a0 Mon Sep 17 00:00:00 2001 From: Peter Mattis Date: Thu, 28 Dec 2017 16:34:29 -0500 Subject: [PATCH 1/2] sql: document differing subquery behavior from Postgres Release note: None --- .../logictest/testdata/logic_test/subquery | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/pkg/sql/logictest/testdata/logic_test/subquery b/pkg/sql/logictest/testdata/logic_test/subquery index d1b2411821a5..a3bfbae6b5a8 100644 --- a/pkg/sql/logictest/testdata/logic_test/subquery +++ b/pkg/sql/logictest/testdata/logic_test/subquery @@ -60,24 +60,49 @@ SELECT (SELECT 1) IN (1) ---- true +# NB: Cockroach has different behavior from Postgres on a few esoteric +# subqueries. The Cockroach behavior seems more sensical and +# supporting the specific Postgres behavior appears onerous. Fingers +# crossed this doesn't bite us down the road. + +# Postgres cannot handle this query (but MySQL can), even though it +# seems sensical: +# ERROR: subquery must return only one column +# LINE 1: select (select 1, 2) IN (select 1, 2); +# ^ query B SELECT (SELECT 1, 2) IN (SELECT 1, 2) ---- true +# Postgres cannot handle this query, even though it seems sensical: +# ERROR: subquery must return only one column +# LINE 1: select (select 1, 2) IN ((1, 2)); +# ^ query B SELECT (SELECT 1, 2) IN ((1, 2)) ---- true +# Postgres cannot handle this query, even though it seems sensical: +# ERROR: subquery has too many columns +# LINE 1: select (select (1, 2)) IN (select 1, 2); +# ^ query B SELECT (SELECT (1, 2)) IN (SELECT 1, 2) ---- true +query B +SELECT (SELECT (1, 2)) IN ((1, 2)) +---- +true + query error unsupported comparison operator: IN SELECT (SELECT 1, 2) IN (SELECT (1, 2)) +# Postgres successfully handles this query (returning true), even +# though doing so seems at odds with other rules. query error unsupported comparison operator: IN SELECT (SELECT (1, 2)) IN (SELECT (1, 2)) From 7c0651e8f6f1821e5b7b4c247a55adf85a42fc73 Mon Sep 17 00:00:00 2001 From: Peter Mattis Date: Thu, 28 Dec 2017 16:48:58 -0500 Subject: [PATCH 2/2] sql: simplify the subquery typing rules The simplification allows Cockroach to support a strict superset of the queries Postgres supports. Release note: None --- .../logictest/testdata/logic_test/subquery | 14 ++- pkg/sql/logictest/testdata/logic_test/typing | 6 +- pkg/sql/subquery.go | 94 +++++++++++-------- 3 files changed, 71 insertions(+), 43 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/subquery b/pkg/sql/logictest/testdata/logic_test/subquery index a3bfbae6b5a8..e2ca3623d33d 100644 --- a/pkg/sql/logictest/testdata/logic_test/subquery +++ b/pkg/sql/logictest/testdata/logic_test/subquery @@ -98,13 +98,19 @@ SELECT (SELECT (1, 2)) IN ((1, 2)) ---- true -query error unsupported comparison operator: IN +# Postgres cannot handle this query, even though it seems sensical: +# ERROR: subquery must return only one column +# LINE 1: select (select 1, 2) in (select (1, 2)); +# ^ +query B SELECT (SELECT 1, 2) IN (SELECT (1, 2)) +---- +true -# Postgres successfully handles this query (returning true), even -# though doing so seems at odds with other rules. -query error unsupported comparison operator: IN +query B SELECT (SELECT (1, 2)) IN (SELECT (1, 2)) +---- +true query B SELECT 1 = ANY(SELECT 1) diff --git a/pkg/sql/logictest/testdata/logic_test/typing b/pkg/sql/logictest/testdata/logic_test/typing index 83d3a17e2c29..e1d8575587c6 100644 --- a/pkg/sql/logictest/testdata/logic_test/typing +++ b/pkg/sql/logictest/testdata/logic_test/typing @@ -169,7 +169,7 @@ true statement error unsupported comparison operator: IN SELECT 1 IN (SELECT 'a') -statement error unsupported comparison operator: IN +statement error unsupported comparison operator: IN SELECT 1 IN (SELECT (1, 2)) query B @@ -177,5 +177,7 @@ SELECT (1, 2) IN (SELECT 1, 2) ---- true -statement error unsupported comparison operator: IN +query B SELECT (1, 2) IN (SELECT (1, 2)) +---- +true diff --git a/pkg/sql/subquery.go b/pkg/sql/subquery.go index 24b2a48736f0..606288817a17 100644 --- a/pkg/sql/subquery.go +++ b/pkg/sql/subquery.go @@ -378,44 +378,59 @@ func (v *subqueryVisitor) replaceSubquery( result := &subquery{subquery: sub, plan: plan} - wrap := true - if len(cols) == 1 { - if !multiRow { - // The subquery has only a single column and is not in a multi-row - // context, we don't want to wrap the type in a tuple. For example: - // - // SELECT (SELECT 1) - // - // and - // - // SELECT (SELECT (1, 2)) - // - // This will result in the types "int" and "tuple{int,int}" respectively. - wrap = false - } else if !types.FamTuple.FamilyEqual(cols[0].Typ) { - // The subquery has only a single column and is in a multi-row - // context. We only wrap if the type of the result column is not a - // tuple. For example: - // - // SELECT 1 IN (SELECT 1) - // - // The type of the subquery will be "tuple{int}". Now consider this - // invalid query: - // - // SELECT (1, 2) IN (SELECT (1, 2)) - // - // We want the type of the subquery to be "tuple{tuple{tuple{int,int}}}" - // in order to distinguish it from this semantically valid query: - // - // SELECT (1, 2) IN (SELECT 1, 2) - // - // In this query, the subquery has the type "tuple{tuple{int,int}}" - // making the IN expression valid. - wrap = false - } - } + // The typing for subqueries is complex, but regular. + // + // * If the subquery is used in a single-row context: + // + // - If the subquery returns a single column with type "U", the type of the + // subquery is the type of the column "U". For example: + // + // SELECT 1 = (SELECT 1) + // + // The type of the subquery is "int". + // + // - If the subquery returns multiple columns, the type of the subquery is + // "tuple{C}" where "C" expands to all of the types of the columns of the + // subquery. For example: + // + // SELECT (1, 'a') = (SELECT 1, 'a') + // + // The type of the subquery is "tuple{int,string}" + // + // * If the subquery is used in a multi-row context: + // + // - If the subquery returns a single column with type "U", the type of the + // subquery is the singleton tuple of type "U": "tuple{U}". For example: + // + // SELECT 1 IN (SELECT 1) + // + // The type of the subquery's columns is "int" and the type of the + // subquery is "tuple{int}". + // + // - If the subquery returns multiple columns, the type of the subquery is + // "tuple{tuple{C}}" where "C expands to all of the types of the columns + // of the subquery. For example: + // + // SELECT (1, 'a') IN (SELECT 1, 'a') + // + // The types of the subquery's columns are "int" and "string". These are + // wrapped into "tuple{int,string}" to form the row type. And these are + // wrapped again to form the subquery type "tuple{tuple{int,string}}". + // + // Note that these rules produce a somewhat surprising equivalence: + // + // SELECT (SELECT 1, 2) = (SELECT (1, 2)) + // + // A subquery which returns a single column tuple is equivalent to a subquery + // which returns the elements of the tuple as individual columns. While + // surprising, this is necessary for regularity and in order to handle: + // + // SELECT 1 IN (SELECT 1) + // + // Without that auto-unwrapping of single-column subqueries, this query would + // type check as " IN " which would fail. - if !wrap { + if len(cols) == 1 { result.typ = cols[0].Typ } else { colTypes := make(types.TTuple, len(cols)) @@ -431,6 +446,11 @@ func (v *subqueryVisitor) replaceSubquery( // SELECT 1 IN (SELECT * FROM t) // // Wrap the type in a tuple. + // + // TODO(peter): Using a tuple type to represent a multi-row subquery works + // with the current type checking code, but seems semantically incorrect. A + // tuple represents a fixed number of elements. Instead, we should either + // be using the table type (TTable) or introduce a new vtuple type. result.typ = types.TTuple{result.typ} }