Skip to content

Commit

Permalink
[#17869] YSQL: fix incorrect optimization on NULL scans
Browse files Browse the repository at this point in the history
Summary:
There is an optimization to avoid performing a scan when the condition
is unsatisfiable.  The logic for determining whether a condition is
unsatisfiable has flaws:

- It has an inverted condition: rather than looking for a row filter
  whose first key is null, it looks for a row filter where any key but
  the first is null.
- It reads data out-of-bounds from an array: it tries to read keys[-1].

Fix both issues.

To test, add a debug log when an unsatisfiable condition is found.  Also
add a psql variable YB_DISABLE_ERROR_PREFIX to avoid adding a prefix to
client messages when a file is being read because that prefix includes a
line number which can easily change over the course of edits to the
file.  Add test cases to exercise the unsatisfiable conditions.  Also
add DIST to the EXPLAINs for further assurance that RPCs are not sent.
Jira: DB-6953

Test Plan:
Expect the following to fail on only the yb_index_scan test due to
issue #17750.

    ./yb_build.sh fastdebug --gcc11 --java-test TestPgRegressIndex

Close: #17869

Reviewers: tnayak

Reviewed By: tnayak

Subscribers: kramanathan, yql

Differential Revision: https://phorge.dev.yugabyte.com/D26352
  • Loading branch information
jaki committed Jun 22, 2023
1 parent 99e0201 commit 05ea20d
Show file tree
Hide file tree
Showing 7 changed files with 280 additions and 76 deletions.
26 changes: 16 additions & 10 deletions src/postgres/src/backend/access/yb_access/yb_scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -757,22 +757,28 @@ YbGetLengthOfKey(ScanKey *key_ptr)
}

/*
* Check whether the conditions lead to empty result regardless of the values
* in the index because of always FALSE or UNKNOWN conditions.
* Return true if the combined key conditions are unsatisfiable.
* Return whether the given conditions are unsatisfiable regardless of the
* values in the index because of always FALSE or UNKNOWN conditions.
*/
static bool
YbIsEmptyResultCondition(int nkeys, ScanKey keys[])
YbIsUnsatisfiableCondition(int nkeys, ScanKey keys[])
{
for (int i = 0; i < nkeys; i++)
for (int i = 0; i < nkeys; ++i)
{
ScanKey key = keys[i];

if (!((key->sk_flags & SK_ROW_MEMBER) && YbIsRowHeader(keys[i - 1])) ||
key->sk_strategy == BTEqualStrategyNumber)
/*
* Look for two cases:
* - = null
* - row(a, b, c) op row(null, e, f)
*/
if ((key->sk_strategy == BTEqualStrategyNumber ||
(i > 0 && YbIsRowHeader(keys[i - 1]) &&
key->sk_flags & SK_ROW_MEMBER)) &&
YbIsNeverTrueNullCond(key))
{
if (YbIsNeverTrueNullCond(key))
return true;
elog(DEBUG1, "skipping a scan due to unsatisfiable condition");
return true;
}
}
return false;
Expand Down Expand Up @@ -2083,7 +2089,7 @@ ybcBeginScan(Relation relation,
ybcSetupScanPlan(xs_want_itup, ybScan, &scan_plan);
ybcSetupScanKeys(ybScan, &scan_plan);

if (!YbIsEmptyResultCondition(ybScan->nkeys, ybScan->keys) &&
if (!YbIsUnsatisfiableCondition(ybScan->nkeys, ybScan->keys) &&
YbBindScanKeys(ybScan, &scan_plan) &&
YbBindHashKeys(ybScan))
{
Expand Down
2 changes: 1 addition & 1 deletion src/postgres/src/bin/psql/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ psql_error(const char *fmt,...)
if (pset.queryFout && pset.queryFout != stdout)
fflush(pset.queryFout);

if (pset.inputfile)
if (pset.inputfile && !GetVariable(pset.vars, "YB_DISABLE_ERROR_PREFIX"))
fprintf(stderr, "%s:%s:" UINT64_FORMAT ": ", pset.progname, pset.inputfile, pset.lineno);
va_start(ap, fmt);
vfprintf(stderr, _(fmt), ap);
Expand Down
145 changes: 112 additions & 33 deletions src/postgres/src/test/regress/expected/yb_index_scan_null_asc.out
Original file line number Diff line number Diff line change
Expand Up @@ -8,134 +8,213 @@ DROP INDEX IF EXISTS i_nulltest_ba;
CREATE INDEX i_nulltest_ba ON nulltest (b ASC, a ASC);
\i sql/yb_index_scan_null_queries.sql
-- Queries for the null scan key tests
SET client_min_messages = DEBUG1;
\set YB_DISABLE_ERROR_PREFIX on
-- Should return empty results (actual rows=0)
-- The plans should not show any "Recheck"
EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF)
EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF)
/*+ IndexScan(t1) NestLoop(t2 t1) Leading((t2 t1)) */
SELECT * FROM nulltest t1 JOIN nulltest2 t2 ON t1.a = t2.x;
QUERY PLAN
DEBUG: skipping a scan due to unsatisfiable condition
QUERY PLAN
----------------------------------------------------------------------------
Nested Loop (actual rows=0 loops=1)
-> Seq Scan on nulltest2 t2 (actual rows=1 loops=1)
Storage Table Read Requests: 3
-> Index Scan using i_nulltest_a on nulltest t1 (actual rows=0 loops=1)
Index Cond: (a = t2.x)
(4 rows)
(5 rows)

EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF)
EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF)
/*+ IndexScan(t1) NestLoop(t2 t1) Leading((t2 t1)) */
SELECT * FROM nulltest t1 JOIN nulltest2 t2 ON t1.a <= t2.x;
QUERY PLAN
QUERY PLAN
----------------------------------------------------------------------------
Nested Loop (actual rows=0 loops=1)
-> Seq Scan on nulltest2 t2 (actual rows=1 loops=1)
Storage Table Read Requests: 3
-> Index Scan using i_nulltest_a on nulltest t1 (actual rows=0 loops=1)
Index Cond: (a <= t2.x)
(4 rows)
Storage Index Read Requests: 1
(6 rows)

EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF)
EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF)
/*+ IndexScan(t1) NestLoop(t2 t1) Leading((t2 t1)) */
SELECT * FROM nulltest t1 JOIN nulltest2 t2 ON t1.a BETWEEN t2.x AND t2.x + 2;
QUERY PLAN
QUERY PLAN
----------------------------------------------------------------------------
Nested Loop (actual rows=0 loops=1)
-> Seq Scan on nulltest2 t2 (actual rows=1 loops=1)
Storage Table Read Requests: 3
-> Index Scan using i_nulltest_a on nulltest t1 (actual rows=0 loops=1)
Index Cond: ((a >= t2.x) AND (a <= (t2.x + 2)))
(4 rows)
Storage Index Read Requests: 1
(6 rows)

EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF)
EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF)
/*+ IndexScan(t1) NestLoop(t2 t1) Leading((t2 t1)) */
SELECT * FROM nulltest t1 JOIN nulltest2 t2 ON (t1.a, t1.b) = (t2.x, t2.y);
QUERY PLAN
DEBUG: skipping a scan due to unsatisfiable condition
QUERY PLAN
-----------------------------------------------------------------------------
Nested Loop (actual rows=0 loops=1)
-> Seq Scan on nulltest2 t2 (actual rows=1 loops=1)
Storage Table Read Requests: 3
-> Index Scan using i_nulltest_ba on nulltest t1 (actual rows=0 loops=1)
Index Cond: ((b = t2.y) AND (a = t2.x))
(4 rows)
(5 rows)

EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF)
EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF)
/*+ IndexScan(t1) NestLoop(t2 t1) Leading((t2 t1)) */
SELECT * FROM nulltest t1 JOIN nulltest2 t2 ON (t1.a, t1.b) <= (t2.x, t2.y);
QUERY PLAN
QUERY PLAN
----------------------------------------------------------------------------
Nested Loop (actual rows=0 loops=1)
-> Seq Scan on nulltest2 t2 (actual rows=1 loops=1)
Storage Table Read Requests: 3
-> Index Scan using i_nulltest_a on nulltest t1 (actual rows=0 loops=1)
Index Cond: (a <= t2.x)
Filter: (ROW(a, b) <= ROW(t2.x, t2.y))
(5 rows)
Storage Index Read Requests: 1
(7 rows)

EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF)
EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF)
/*+ IndexScan(t1) */
SELECT * FROM nulltest t1 WHERE (a, b) <= (null, 1);
QUERY PLAN
QUERY PLAN
----------------------------------------------------------------------
Index Scan using i_nulltest_a on nulltest t1 (actual rows=0 loops=1)
Index Cond: (a <= NULL::integer)
Filter: (ROW(a, b) <= ROW(NULL::integer, 1))
(3 rows)
Storage Index Read Requests: 1
(4 rows)

EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF)
/*+ IndexScan(t1 i_nulltest_ba) */
SELECT * FROM nulltest t1 WHERE (a, b) <= (null, 1);
DEBUG: skipping a scan due to unsatisfiable condition
QUERY PLAN
-----------------------------------------------------------------------
Index Scan using i_nulltest_ba on nulltest t1 (actual rows=0 loops=1)
Index Cond: (ROW(a, b) <= ROW(NULL::integer, 1))
(2 rows)

EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF)
EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF)
/*+ IndexScan(t1) */
SELECT a FROM nulltest t1 WHERE a IN (null, null);
QUERY PLAN
QUERY PLAN
----------------------------------------------------------------------
Index Scan using i_nulltest_a on nulltest t1 (actual rows=0 loops=1)
Index Cond: (a = ANY ('{NULL,NULL}'::integer[]))
(2 rows)

EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF)
/*+ IndexScan(t1 i_nulltest_ba) */
SELECT a FROM nulltest t1 WHERE a IN (null, null);
QUERY PLAN
-----------------------------------------------------------------------
Index Scan using i_nulltest_ba on nulltest t1 (actual rows=0 loops=1)
Index Cond: (a = ANY ('{NULL,NULL}'::integer[]))
(2 rows)

-- Should return 1s
/*+ IndexScan(t1) */
SELECT a FROM nulltest t1 WHERE a IN (null, 1);
a
a
---
1
1
(2 rows)

EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF)
EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF)
/*+ IndexScan(t1) */
SELECT a FROM nulltest t1 WHERE a IN (null, 1);
QUERY PLAN
QUERY PLAN
----------------------------------------------------------------------
Index Scan using i_nulltest_a on nulltest t1 (actual rows=2 loops=1)
Index Cond: (a = ANY ('{NULL,1}'::integer[]))
Storage Table Read Requests: 1
Storage Index Read Requests: 1
(4 rows)

/*+ IndexScan(t1 i_nulltest_ba) */
SELECT a FROM nulltest t1 WHERE a IN (null, 1);
a
---
1
1
(2 rows)

EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF)
/*+ IndexScan(t1 i_nulltest_ba) */
SELECT a FROM nulltest t1 WHERE a IN (null, 1);
QUERY PLAN
-----------------------------------------------------------------------
Index Scan using i_nulltest_ba on nulltest t1 (actual rows=2 loops=1)
Index Cond: (a = ANY ('{NULL,1}'::integer[]))
Storage Table Read Requests: 1
Storage Index Read Requests: 1
(4 rows)

/*+ IndexScan(t1) */
SELECT a FROM nulltest t1 WHERE (a, b) <= (2, null);
a
a
---
1
1
(2 rows)

EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF)
EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF)
/*+ IndexScan(t1) */
SELECT a FROM nulltest t1 WHERE (a, b) <= (2, null);
QUERY PLAN
QUERY PLAN
----------------------------------------------------------------------
Index Scan using i_nulltest_a on nulltest t1 (actual rows=2 loops=1)
Index Cond: (a <= 2)
Filter: (ROW(a, b) <= ROW(2, NULL::integer))
(3 rows)
Storage Table Read Requests: 1
Storage Index Read Requests: 1
(5 rows)

/*+ IndexScan(t1 i_nulltest_ba) */
SELECT a FROM nulltest t1 WHERE (a, b) <= (2, null);
a
---
1
1
(2 rows)

EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF)
/*+ IndexScan(t1 i_nulltest_ba) */
SELECT a FROM nulltest t1 WHERE (a, b) <= (2, null);
QUERY PLAN
-----------------------------------------------------------------------
Index Scan using i_nulltest_ba on nulltest t1 (actual rows=2 loops=1)
Index Cond: (ROW(a, b) <= ROW(2, NULL::integer))
Rows Removed by Index Recheck: 2
Storage Table Read Requests: 1
Storage Index Read Requests: 1
(5 rows)

-- Should return nulls
/*+ IndexScan(t1) */
SELECT a FROM nulltest t1 WHERE a IS NULL;
a
a
---


(2 rows)

EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF)
EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF)
/*+ IndexScan(t1) */
SELECT a FROM nulltest t1 WHERE a IS NULL;
QUERY PLAN
QUERY PLAN
----------------------------------------------------------------------
Index Scan using i_nulltest_a on nulltest t1 (actual rows=2 loops=1)
Index Cond: (a IS NULL)
(2 rows)
Storage Table Read Requests: 1
Storage Index Read Requests: 1
(4 rows)

RESET client_min_messages;
\unset YB_DISABLE_ERROR_PREFIX
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
-- Create tables for the null scan key tests
CREATE TABLE nulltest (a int, b int);
--
-- As of 2023-06-21, the tables will default to 3 tablets, but in case those
-- defaults change, explicitly set the numbers here. The number of tablets
-- affects the number of requests shown in EXPLAIN DIST.
CREATE TABLE nulltest (a int, b int) SPLIT INTO 3 TABLETS;
INSERT INTO nulltest VALUES (null, null), (null, 1), (1, null), (1, 1);
CREATE TABLE nulltest2 (x int, y int);
CREATE TABLE nulltest2 (x int, y int) SPLIT INTO 3 TABLETS;
INSERT INTO nulltest2 VALUES (null, null);
Loading

0 comments on commit 05ea20d

Please sign in to comment.