Skip to content

Commit

Permalink
Merge pull request #21114 from petermattis/pmattis/subquery
Browse files Browse the repository at this point in the history
sql: simplify the subquery typing rules
  • Loading branch information
petermattis authored Dec 29, 2017
2 parents 1d1e1b0 + 7c0651e commit 283bcdf
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 41 deletions.
35 changes: 33 additions & 2 deletions pkg/sql/logictest/testdata/logic_test/subquery
Original file line number Diff line number Diff line change
Expand Up @@ -60,26 +60,57 @@ SELECT (SELECT 1) IN (1)
----
true

# NB: Cockroach has different behavior from Postgres on a few esoteric
# subqueries. The Cockroach behavior seems more sensical and
# supporting the specific Postgres behavior appears onerous. Fingers
# crossed this doesn't bite us down the road.

# Postgres cannot handle this query (but MySQL can), even though it
# seems sensical:
# ERROR: subquery must return only one column
# LINE 1: select (select 1, 2) IN (select 1, 2);
# ^
query B
SELECT (SELECT 1, 2) IN (SELECT 1, 2)
----
true

# Postgres cannot handle this query, even though it seems sensical:
# ERROR: subquery must return only one column
# LINE 1: select (select 1, 2) IN ((1, 2));
# ^
query B
SELECT (SELECT 1, 2) IN ((1, 2))
----
true

# Postgres cannot handle this query, even though it seems sensical:
# ERROR: subquery has too many columns
# LINE 1: select (select (1, 2)) IN (select 1, 2);
# ^
query B
SELECT (SELECT (1, 2)) IN (SELECT 1, 2)
----
true

query error unsupported comparison operator: <tuple{int, int}> IN <tuple{tuple{tuple{int, int}}}>
query B
SELECT (SELECT (1, 2)) IN ((1, 2))
----
true

# Postgres cannot handle this query, even though it seems sensical:
# ERROR: subquery must return only one column
# LINE 1: select (select 1, 2) in (select (1, 2));
# ^
query B
SELECT (SELECT 1, 2) IN (SELECT (1, 2))
----
true

query error unsupported comparison operator: <tuple{int, int}> IN <tuple{tuple{tuple{int, int}}}>
query B
SELECT (SELECT (1, 2)) IN (SELECT (1, 2))
----
true

query B
SELECT 1 = ANY(SELECT 1)
Expand Down
6 changes: 4 additions & 2 deletions pkg/sql/logictest/testdata/logic_test/typing
Original file line number Diff line number Diff line change
Expand Up @@ -169,13 +169,15 @@ true
statement error unsupported comparison operator: <int> IN <tuple{string}>
SELECT 1 IN (SELECT 'a')

statement error unsupported comparison operator: <int> IN <tuple{tuple{tuple{int, int}}}>
statement error unsupported comparison operator: <int> IN <tuple{tuple{int, int}}>
SELECT 1 IN (SELECT (1, 2))

query B
SELECT (1, 2) IN (SELECT 1, 2)
----
true

statement error unsupported comparison operator: <tuple{int, int}> IN <tuple{tuple{tuple{int, int}}}>
query B
SELECT (1, 2) IN (SELECT (1, 2))
----
true
94 changes: 57 additions & 37 deletions pkg/sql/subquery.go
Original file line number Diff line number Diff line change
Expand Up @@ -378,44 +378,59 @@ func (v *subqueryVisitor) replaceSubquery(

result := &subquery{subquery: sub, plan: plan}

wrap := true
if len(cols) == 1 {
if !multiRow {
// The subquery has only a single column and is not in a multi-row
// context, we don't want to wrap the type in a tuple. For example:
//
// SELECT (SELECT 1)
//
// and
//
// SELECT (SELECT (1, 2))
//
// This will result in the types "int" and "tuple{int,int}" respectively.
wrap = false
} else if !types.FamTuple.FamilyEqual(cols[0].Typ) {
// The subquery has only a single column and is in a multi-row
// context. We only wrap if the type of the result column is not a
// tuple. For example:
//
// SELECT 1 IN (SELECT 1)
//
// The type of the subquery will be "tuple{int}". Now consider this
// invalid query:
//
// SELECT (1, 2) IN (SELECT (1, 2))
//
// We want the type of the subquery to be "tuple{tuple{tuple{int,int}}}"
// in order to distinguish it from this semantically valid query:
//
// SELECT (1, 2) IN (SELECT 1, 2)
//
// In this query, the subquery has the type "tuple{tuple{int,int}}"
// making the IN expression valid.
wrap = false
}
}
// The typing for subqueries is complex, but regular.
//
// * If the subquery is used in a single-row context:
//
// - If the subquery returns a single column with type "U", the type of the
// subquery is the type of the column "U". For example:
//
// SELECT 1 = (SELECT 1)
//
// The type of the subquery is "int".
//
// - If the subquery returns multiple columns, the type of the subquery is
// "tuple{C}" where "C" expands to all of the types of the columns of the
// subquery. For example:
//
// SELECT (1, 'a') = (SELECT 1, 'a')
//
// The type of the subquery is "tuple{int,string}"
//
// * If the subquery is used in a multi-row context:
//
// - If the subquery returns a single column with type "U", the type of the
// subquery is the singleton tuple of type "U": "tuple{U}". For example:
//
// SELECT 1 IN (SELECT 1)
//
// The type of the subquery's columns is "int" and the type of the
// subquery is "tuple{int}".
//
// - If the subquery returns multiple columns, the type of the subquery is
// "tuple{tuple{C}}" where "C expands to all of the types of the columns
// of the subquery. For example:
//
// SELECT (1, 'a') IN (SELECT 1, 'a')
//
// The types of the subquery's columns are "int" and "string". These are
// wrapped into "tuple{int,string}" to form the row type. And these are
// wrapped again to form the subquery type "tuple{tuple{int,string}}".
//
// Note that these rules produce a somewhat surprising equivalence:
//
// SELECT (SELECT 1, 2) = (SELECT (1, 2))
//
// A subquery which returns a single column tuple is equivalent to a subquery
// which returns the elements of the tuple as individual columns. While
// surprising, this is necessary for regularity and in order to handle:
//
// SELECT 1 IN (SELECT 1)
//
// Without that auto-unwrapping of single-column subqueries, this query would
// type check as "<int> IN <tuple{tuple{int}}>" which would fail.

if !wrap {
if len(cols) == 1 {
result.typ = cols[0].Typ
} else {
colTypes := make(types.TTuple, len(cols))
Expand All @@ -431,6 +446,11 @@ func (v *subqueryVisitor) replaceSubquery(
// SELECT 1 IN (SELECT * FROM t)
//
// Wrap the type in a tuple.
//
// TODO(peter): Using a tuple type to represent a multi-row subquery works
// with the current type checking code, but seems semantically incorrect. A
// tuple represents a fixed number of elements. Instead, we should either
// be using the table type (TTable) or introduce a new vtuple type.
result.typ = types.TTuple{result.typ}
}

Expand Down

0 comments on commit 283bcdf

Please sign in to comment.