From 19b14805f7ab0dd22c691f2c5a8f12198e677975 Mon Sep 17 00:00:00 2001 From: Rebecca Taft Date: Tue, 18 Aug 2020 20:48:14 -0500 Subject: [PATCH] opt: add index acceleration support for ~ and && bounding box operators This commit adds index acceleration support for the bounding box comparison operators, ~ and &&. It maps ~ to Covers and && to Intersects. Release note (performance improvement): The ~ and && geospatial bounding box operations can now benefit from index acceleration if one of the operands is an indexed geometry column. --- .../logic_test/inverted_filter_geospatial | 54 ++++ .../inverted_filter_geospatial_dist | 28 ++ .../inverted_filter_geospatial_explain_local | 72 +++++ .../logic_test/inverted_join_geospatial | 68 +++++ .../logic_test/inverted_join_geospatial_dist | 22 ++ .../inverted_join_geospatial_explain | 76 +++++ pkg/sql/opt/invertedidx/geo.go | 262 +++++++++++------- pkg/sql/opt/invertedidx/geo_test.go | 126 ++++++++- pkg/sql/opt/operator.go | 7 +- pkg/sql/opt/ops/scalar.opt | 16 ++ pkg/sql/opt/optbuilder/scalar.go | 22 +- pkg/sql/opt/optbuilder/testdata/scalar | 94 +++++++ pkg/sql/opt/xform/custom_funcs.go | 2 +- pkg/sql/opt/xform/testdata/rules/join | 101 +++++++ pkg/sql/opt/xform/testdata/rules/select | 114 ++++++++ 15 files changed, 957 insertions(+), 107 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_filter_geospatial b/pkg/sql/logictest/testdata/logic_test/inverted_filter_geospatial index 6bd2dcb43a8d..cfa6bd388616 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_filter_geospatial +++ b/pkg/sql/logictest/testdata/logic_test/inverted_filter_geospatial @@ -45,6 +45,60 @@ SELECT k FROM geo_table WHERE ST_DWithin('POINT(2.5 2.5)'::geometry, geom, 1) OR 3 6 +# Bounding box operations. +statement ok +SET CLUSTER SETTING sql.spatial.experimental_box2d_comparison_operators.enabled = on + +query I +SELECT k FROM geo_table WHERE 'POINT(3.0 3.0)'::geometry && geom ORDER BY k +---- +3 +6 + +query I +SELECT k FROM geo_table WHERE 'POINT(3.0 3.0)'::geometry::box2d && geom ORDER BY k +---- +3 +6 + +query I +SELECT k FROM geo_table WHERE ST_Covers('LINESTRING(1.0 1.0, 5.0 5.0)'::geometry, geom) ORDER BY k +---- +1 +2 +3 +4 + +# Note that the result of the `~` bounding box operation includes an extra +# result not present in the previous result of ST_Covers. +query I +SELECT k FROM geo_table WHERE 'LINESTRING(1.0 1.0, 5.0 5.0)'::geometry ~ geom ORDER BY k +---- +1 +2 +3 +4 +6 + +query I +SELECT k FROM geo_table WHERE 'LINESTRING(1.0 1.0, 5.0 5.0)'::geometry::box2d ~ geom ORDER BY k +---- +1 +2 +3 +4 +6 + +query I +SELECT k FROM geo_table WHERE geom ~ 'LINESTRING(1.0 1.0, 5.0 5.0)'::geometry ORDER BY k +---- +6 + +query I +SELECT k FROM geo_table WHERE geom ~ 'LINESTRING(1.0 1.0, 5.0 5.0)'::geometry::box2d ORDER BY k +---- +6 + statement ok CREATE TABLE geo_table2( k int, diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_filter_geospatial_dist b/pkg/sql/logictest/testdata/logic_test/inverted_filter_geospatial_dist index 83bf1424e826..a18e2bd4b696 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_filter_geospatial_dist +++ b/pkg/sql/logictest/testdata/logic_test/inverted_filter_geospatial_dist @@ -143,3 +143,31 @@ SELECT url FROM [EXPLAIN (DISTSQL) SELECT k FROM geo_table WHERE ST_CoveredBy('MULTIPOINT((2.2 2.2), (3.0 3.0))'::geometry, geom) ORDER BY k] ---- https://cockroachdb.github.io/distsqlplan/decode.html#eJyUU99P2zAQft9fcboXWs1rbScU6qeyEbZMhbK004ZIhEJzYhEhzmwXgVD_9ykNA1rWjvohyf347vvuLn5A-7tAheNgGHyawMwUcBSNjuE8-Hk6PAhPoHUYjifjb8M2PKZcNwlXpC9celkQ_PgSRAFYdzHVt2Qou7xv7Rx_H07C01F4Mmm1ZEeC7Mg2g5bX4eB1eLu9o9TnYHQcTKIzVpe6acMoOgwi-HgG1wkyLHVGJ-kNWVTnKDBhWBk9JWu1qV0Pi4Qwu0PFGeZlNXO1O2E41YZQPaDLXUGocFJrjCjNyHQ5MszIpXmxKPvUwqBWcJGXGd0hw3GVllZBV4hd2Zdil_s93tv3e_293od_OPcgLTPw90G7X2QsJnOGeuaeFVmXXhEqMWdvVx2Wt2QcZUd54ciQ6Ypl6X_jwV1lQJcwEApsrRusS41TMcaxt7cbx1zyOOb8fw8EKrMtUSJGWOmd4WjmFAzE2inIbabwVefl4-rkutVVJr9Jzf0zNRvItezeNuxjbRyZrrfMPBDvkWGzF7Xy13PBfd4c-fgWXDQf_YP-0-HCf2U_Zy6dff-VvaNeXp6BbL9h7P42jUdkK11aWmp8XWU-TxhSdkXNtbR6ZqZ0avR0QdOYowVu4cjIuiYqGiMsm1At8CVYbATLzWC5EextBnsbwf4KOJm_-xMAAP__NiSaTw== + +# Bounding box operations. +statement ok +SET CLUSTER SETTING sql.spatial.experimental_box2d_comparison_operators.enabled = on + +query T +SELECT url FROM [EXPLAIN (DISTSQL) +SELECT k FROM geo_table WHERE geom && 'POINT(3.0 3.0)'::geometry] +---- +https://cockroachdb.github.io/distsqlplan/decode.html#eJyUk29v2j4Qx5__XsXpntBKbrET_rR-xE9bumWiwABpmxpUZeTEogU7s52JCfHepyTdWuhISx5Yuj-fy33P5y3aHxlKnAXD4M0cCpPBzXR8C3fB58nw_3AEZ2_D2Xz2cXgODynf64QV6XsXf80IPr0PpkFpryEqOPd69QmtyTgczc_8Sw7-JT9vSfkuGN8G8-mXBTJUOqFRvCaL8g4FMvRwwTA3eknWalO6t1VSmGxQcoapygtXuhcMl9oQyi261GWEEkf6QudtHxkm5OI0q9J2DHXhHiHr4hWh7OzYk8KiufC8VDilOCHT5nvl8e8ABqX0-1QltEGGszxWVkJbiK537Yku7_R476rTu-73Lv7h7EOsEvA5aPeNjMVjXYtTug7VTzKOkps0c2TItMV-63_iwSY3oBUMhARb9g3WxcbJCKPI73ejiHs8ijh_6UAglZxIiQjhQDvDceEkDMTRKXinTOGDTtXD1XnHri436To2v5BhPSsJA-9gi7nggj__rjrP7JZ8XPFXiPH3xLyw4VOyuVaWXrXifLdgSMmK6ldkdWGWNDF6Wf2mNscVVzkSsq6OdmojVHWobPApLBphrxn2GmG_GfYbYX4AL3b__Q4AAP__ZFp--A== + +query T +SELECT url FROM [EXPLAIN (DISTSQL) +SELECT k FROM geo_table WHERE 'POINT(3.0 3.0)'::geometry::box2d && geom] +---- +https://cockroachdb.github.io/distsqlplan/decode.html#eJyUk99v2jAQx9_3V5zuhVZyhe1QaP3EuqYaE4MOkIbUoCojJxYt2JntTEyI_31K0m2lLWnJg6X78bl873zeovuZocJpOAw_zKCwGdxMxp_hLpzfDt8PRnByPZjOpl-Gp_CQ8qNOWJG59_G3jODrx3ASQosLLvjz76LzzG4pdTWey2uICs5ltz7LgusFMtQmoVG8JofqDgUylLhgmFuzJOeMLd3bKmmQbFBxhqnOC1-6FwyXxhKqLfrUZ4QKR-bM5O0AGSbk4zSr0nYMTeH_Q87HK0LV2bFHhUVz4VnZ-oTihGyb75XHf5Pply3dpzqhDTKc5rF2CtpCnMtLKc55p8u7F53uZa979oKzB7FOIOBg_HeyDg-pFseoHuhfZD0lN2nmyZJti33pf-PhJrdgNPSFAlfqBudj61WEURT0zqOISx5FnL92IJBOjqREhPCkd4bjwivoi4NTkMdM4ZNJ9cPVyUNXl9t0HdvfyLCelYLW1Xh-EkDAAghOW-rFHe7LN2gN9rS-ssATcrnRjt60wXy3YEjJiupH4kxhl3RrzbL6TW2OK65yJOR8He3UxkDXoVLgY1g0wrIZlo1w0AwHjTB_Ai927_4EAAD__0vdfDM= + +query T +SELECT url FROM [EXPLAIN (DISTSQL) +SELECT k FROM geo_table WHERE 'LINESTRING(1.0 1.0, 5.0 5.0)'::geometry ~ geom] +---- +https://cockroachdb.github.io/distsqlplan/decode.html#eJyUk99v2jAQx9_3V5zuhVZyi50EWP3EtIUuE4UuIG1Tg6qMnLpowc5sMzEh9rdPIe0GaKTFUhLdj8_lvmd7jfZHgRIn4TB8O4WlKWAQj2_gLvx8O3wTjeDsXTSZTj4Oz-Ex5Xud8ED63qVfC4JP78M4hNYwGoWTaRyNrs_EJQdxyRl0Lnn1nLekvA7HN-E0_gK_K3QxQ4ZKZzRKF2RR3qFAhh7OGJZGz8labSr3epsUZSuUnGGuyqWr3DOGc20I5Rpd7gpCiSN9ocu2jwwzcmlebNM2DPXS_YOsSx8IZbBhO4VFc-FpJTKmNCPT5nvl8e8M-pWk-1xltEKGkzJVVkJbiI535YkOD7q8-zroXvW6F_9x9iBVGfg90O4bGYvHuhandB2pn2QcZYO8cGTItMV-60_xcFUa0Ar6QoKt-gbrUuNkgkni9zpJwj2eJJw_90IglZ1IiQThQDvD8dJJ6IujU_BOmcIHnavHrfOObV1p8kVqfiHDelYSWlxwj9fr6buzBtwfNNkiCPih3ZJ7N6DvvUCpv6f0meMfky21svSi8883M4aUPVB9xaxemjndGj3f_qY2x1tu68jIujoa1Eak6lDV4C4sGmGvGfYaYb8Z9hthfgDPNq_-BAAA___AfYR8 + +query T +SELECT url FROM [EXPLAIN (DISTSQL) +SELECT k FROM geo_table WHERE geom ~ 'LINESTRING(1.0 1.0, 5.0 5.0)'::geometry::box2d] +---- +https://cockroachdb.github.io/distsqlplan/decode.html#eJyUkt9v2jAQx9_3V5zupa3kCdv8Gn5iXYPGxEoHSENqoirDJxYt2JltJibE_vYpCd1aNFjxA-h7d1_nc-fbov-eo8JpNIrezWDtchhMxh_hPprfjd4Ob-HyZjidTT-NrmBf8q0uWJJ9COmXnODz-2gSlXoFv-CCCy55fR7_n5wBbw5OadFq8UN9odT1eC5vEmRorKbbdEUe1T0KTBgWzi7Ie-vK0LYqGOoNKs4wM8U6lOGE4cI6QrXFkIWcUOGsRJ9Qqsk1ODLUFNIsr67901m_7OkhM5o2yHBapMYraAjRlj0p2rzV4Z03rU6v23n9j2AXUqOhJ8GGr-Q8JjuGdh3-EvmQLgmV2LGXUw_ND3KB9CDLAzlyDfEc_TEfbQoH1kBfKPAlN_iQuqBijONmtx3HXPI45vx_Pwhk9JkuESMc9M5wvA4K-uLoFOQ5U_hgM7N_Onns6QqXrVL3ExnWs1LQl-V6Xo_nlwIEa0P76kLtN-sFgM1zACfkC2s8PYM7djPfJQxJL6leYG_XbkF3zi6qz9RyXPmqgCYf6qyoxdDUqRLwqVmcNMvTZnnS3DwwJ7tXvwMAAP__dHFMpA== diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_filter_geospatial_explain_local b/pkg/sql/logictest/testdata/logic_test/inverted_filter_geospatial_explain_local index c25759aa79f1..47fe7c6a846e 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_filter_geospatial_explain_local +++ b/pkg/sql/logictest/testdata/logic_test/inverted_filter_geospatial_explain_local @@ -57,3 +57,75 @@ filter · · · missing stats · · table geo_table2@geom_index · spans 20 spans + +# Bounding box operations. +statement ok +SET CLUSTER SETTING sql.spatial.experimental_box2d_comparison_operators.enabled = on + +query TTT +EXPLAIN SELECT k FROM geo_table2 WHERE geom && 'POINT(3.0 3.0)'::geometry +---- +· distribution local +· vectorized false +filter · · + │ filter geom && '010100000000000000000008400000000000000840' + └── index join · · + │ table geo_table2@primary + └── inverted filter · · + │ inverted column geom_inverted_key + │ num spans 31 + └── scan · · +· missing stats · +· table geo_table2@geom_index +· spans 31 spans + +query TTT +EXPLAIN SELECT k FROM geo_table2 WHERE 'POINT(3.0 3.0)'::geometry::box2d && geom +---- +· distribution local +· vectorized false +filter · · + │ filter 'BOX(3 3,3 3)' && geom + └── index join · · + │ table geo_table2@primary + └── inverted filter · · + │ inverted column geom_inverted_key + │ num spans 31 + └── scan · · +· missing stats · +· table geo_table2@geom_index +· spans 31 spans + +query TTT +EXPLAIN SELECT k FROM geo_table2 WHERE 'LINESTRING(1.0 1.0, 5.0 5.0)'::geometry ~ geom +---- +· distribution local +· vectorized false +filter · · + │ filter '010200000002000000000000000000F03F000000000000F03F00000000000014400000000000001440' ~ geom + └── index join · · + │ table geo_table2@primary + └── inverted filter · · + │ inverted column geom_inverted_key + │ num spans 38 + └── scan · · +· missing stats · +· table geo_table2@geom_index +· spans 38 spans + +query TTT +EXPLAIN SELECT k FROM geo_table2 WHERE geom ~ 'LINESTRING(1.0 1.0, 5.0 5.0)'::geometry::box2d +---- +· distribution local +· vectorized false +filter · · + │ filter geom ~ 'BOX(1 1,5 5)' + └── index join · · + │ table geo_table2@primary + └── inverted filter · · + │ inverted column geom_inverted_key + │ num spans 93 + └── scan · · +· missing stats · +· table geo_table2@geom_index +· spans 93 spans diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial b/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial index fc7b99ef3943..d75878cd1dcd 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial +++ b/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial @@ -198,3 +198,71 @@ ORDER BY rk 12 14 15 + +# Bounding box operations. +statement ok +SET CLUSTER SETTING sql.spatial.experimental_box2d_comparison_operators.enabled = on + +query II +SELECT lk, rk FROM ltable JOIN rtable@geom_index ON ltable.geom1 ~ rtable.geom +ORDER BY lk, rk +---- +1 13 + +query II +SELECT lk, rk FROM ltable JOIN rtable@geom_index ON rtable.geom ~ ltable.geom1 +ORDER BY lk, rk +---- +1 13 +1 16 +2 14 +2 16 +3 12 +3 16 +5 12 +5 16 + +query II +SELECT lk, rk FROM ltable JOIN rtable@geom_index ON rtable.geom && ltable.geom1 +ORDER BY lk, rk +---- +1 13 +1 16 +2 14 +2 16 +3 12 +3 16 +5 12 +5 16 + +query II +SELECT lk, rk FROM ltable JOIN rtable@geom_index ON ltable.geom1::box2d ~ rtable.geom +ORDER BY lk, rk +---- +1 13 + +query II +SELECT lk, rk FROM ltable JOIN rtable@geom_index ON rtable.geom ~ ltable.geom1::box2d +ORDER BY lk, rk +---- +1 13 +1 16 +2 14 +2 16 +3 12 +3 16 +5 12 +5 16 + +query II +SELECT lk, rk FROM ltable JOIN rtable@geom_index ON ltable.geom1::box2d && rtable.geom +ORDER BY lk, rk +---- +1 13 +1 16 +2 14 +2 16 +3 12 +3 16 +5 12 +5 16 diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_dist b/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_dist index 5d737daf16d7..6e5813f846f7 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_dist +++ b/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_dist @@ -92,3 +92,25 @@ SELECT url FROM [EXPLAIN (DISTSQL) SELECT lk FROM ltable WHERE NOT EXISTS (SELECT * FROM rtable WHERE ST_Intersects(ltable.geom2, rtable.geom))] ---- https://cockroachdb.github.io/distsqlplan/decode.html#eJy0U9FumzAUfd9XWPcpmZwRTLIHP1FtVKXKoAtIq1ShicFVhkYws420Ksq_T-BoKWh0dFEfr-8595xjXx9A_SyBQ-RtvA8xaWRJrrfhJ_Lg3d9trvyAzD76URx93szJCVL-MIhSp99KJF9uvK1HgjAm3n2LJLMT7q2Byacwpb8WlUapMNNqZia826HYM3oCdtV8ngCFSuQYpHtUwB_ABgoMKDiQUKilyFApIdvWoQP6-S_gSwpFVTe6PU4oZEIi8APoQpcIHOJWYItpjtJaAoUcdVqU3Xhjxa1lsU_lI1CI6rRSnCysVjRsNCeuTV0HkiMF0eiziNLpDoHbRzrdSCAWorbWPQtjg1e9wfb0hPa0hBZbWM7_ZGTTrbCJVpzFKxtx-kbk2Kv_scFGPbBRD2fpphIyR4n58KX_DflLkJtUfb8VRYXSWvVzxI81crLxrmNyFcQ-uQ39oI0QDD6cyyhxnfn5kkfTOS9Z5y2qWlQKJ-3zso2G-Q7NVSnRyAzvpMg6GVOGHa87yFFp012bwq-6VrcCT8n2s-T3PfJySGaXKDuXkFcvILMhef0s2R5kTo5vfgcAAP__M3_rTA== + +# Bounding box operations. +statement ok +SET CLUSTER SETTING sql.spatial.experimental_box2d_comparison_operators.enabled = on + +query T +SELECT url FROM [EXPLAIN (DISTSQL) +SELECT lk, rk FROM ltable JOIN rtable@geom_index ON ltable.geom1 ~ rtable.geom] +---- +https://cockroachdb.github.io/distsqlplan/decode.html#eJzElMGO2jwQgO__U1hz2pXMD0mAhZxStVTKiiZb4FBpFa1SPEIpwU5tZ0WF2GevkqBCKJjQSuUW2_N5Pnsm3oD6noIL09F49H5GcpmSj5PwE3kefXkav_MDcvfBn86mn8f3ZBeSLimRyyoq1fHXFMlj6AdElt_eAsXqJeEM1yQMdgH_F5MWedvFlMMIKHDBMIhXqMB9Bgso2EDBgYhCJsUclRKyWNqUgT5bg9uhkPAs18V0RGEuJIK7AZ3oFMGFWbH9BGOGst0BCgx1nKTl9pWJl8lkFcsfQGGaxVy5pNUukoa5dolnUc-GaEtB5HqfROl4geBaW9pcxOevKDWyR5FwlG2n7vLbVQH9RYzWmSRKv8zFK0p159mUeN37miL1nLOW9jWWhd3utvonDfe3NRZimWfkm0g4EdwlXnGkMCCeTd6I1zv0Oy_nnJXbO-VcSIYSWU0o2p7QD0RLZO3hUeDp1N1aaqt5G1nN2qhtt8oqX91IF1SOGql7o0a6YHnQSA__vpHs5tW0G1bTaf1JLS-IHNWyd6NaXrA8qOXgto_CCbkJqkxwhY3--U7xaCBbYPXCKJHLOT5JMS_TVMOw5MoJhkpXq1Y18Hm1VAgewpYRts2wbYSdGmwdw45Zu2NO3TXSPTPcM8J9M9z_m0M_GOGBOfPACA_N8PAq7Wj7388AAAD__5r_BqA= + +query T +SELECT url FROM [EXPLAIN (DISTSQL) +SELECT lk, rk FROM ltable JOIN rtable@geom_index ON rtable.geom ~ ltable.geom1] +---- +https://cockroachdb.github.io/distsqlplan/decode.html#eJzElFFv2jAQx9_3Kax7aiUzSAIU8pRpY1IqRjrgYVIVVSk5oYxgZ7ZTUSH62ScnbBAGJmzSeIvP9_f_57uL1yB_pODCZDAcfJySXKTk8zj4Qh4H3x6GH_wRufnkT6aTr8Nbsk1JF5SIRZmVqug5RXIf-CMiim9vjnz5lLAYVyT4FXyvg-Rtm16srBAoMB7jKFqiBPcRLKBgAwUHQgqZ4DOUkgu9tS4S_XgFbotCwrJc6XBIYcYFgrsGlagUwYWpPn-MUYyi2QIKMaooSYvjS28vE8kyEq9AYZJFTLqk0dSmQa5c4lnUsyHcUOC52plIFc0RXGtD64P47AWFwvieJwxF06my_FEqoL8Vg1UmiFRPM_6CAuPn1xvPpsRr31YoqeecBLUvAdWA24J1j0LuCjbkfJFn5DtPGOHMJZ6-VTAiXoe8Ea9SxdNwzkm4HVPOuIj15StA4eYI_og3eNbsHyQet25XrK36k2TVm6Sm3SgaffEsnUE5mKX29WbpDOjeLN39_1my6zfUrtlQp_E37TwDctDOzvXaeQZ0r5296z4NR-DGKDPOJNb681v66cB4juU7I3kuZvgg-KywKZdBoSsCMUpV7lrlwmfllgbcF1tGsW0W20axUxFbh2LHjN0yW7eN6o5Z3DGKu2Zx918ufWcU98zOPaO4bxb3L8ION-9-BgAA__9vVwoz + +query T +SELECT url FROM [EXPLAIN (DISTSQL) +SELECT lk, rk FROM ltable JOIN rtable@geom_index ON rtable.geom && ltable.geom1] +---- +https://cockroachdb.github.io/distsqlplan/decode.html#eJzMlM-O2jAQh-99CmtOu5Ip-QMs5JSqpVJWlGyBQ6VttErxCKUEO7Wdigrx7lUSWggFE9oDvSBi-_N89m_kDahvKXgwHY6Gb2cklyl5Pwk_kOfhp6fRm2BM7t4F09n04-ie7JakS0rkslqV6vhLiuQxDMZElv_9BYrVS8IZrkn4a_B1MUg-55bl9KrfHVlO2BFQ4ILhOF6hAu8ZbKDgAAUXIgqZFHNUSshialMuDNgaPItCwrNcF8MRhbmQCN4GdKJTBA9mxf4TjBnKtgUUGOo4Scvtq9p-JpNVLH8AhWkWc-WRVrsoGubaI75NfQeiLQWR630RpeMFgmdvaXORgH9HqZE9ioSjbLt1lz9uDehvYrjOJFH6JeEapcK5Vne-Q4nfua9pUt89a-pcY1oY7m6sd9Jyf2MjIZZ5Rr6KhBPBPeIXxwrHxO_WY_ZrN3re0z3rudfLuZAMJbKaW7Q9cZKxaImsPThaeLp0p1babt5VdrOuajutMvSr--qCylFfdW7YVxdMD_rq4aZ95TQP12kYrtv6m2gviBxF271htBdMD6Lt_zdPxgnPCapMcIWNXgSreFKQLbB6f5TI5RyfpJiXZarPsOTKAYZKV7N29RHwaqoQPIRtI-yYYccIuzXYPoZds7ZlLt0x0l0z3DXCPTPc-5dDPxjhvrly3wgPzPDgKu1o--pnAAAA___waBaG diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_explain b/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_explain index f9b1764ca876..9ede2ee238d1 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_explain +++ b/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_explain @@ -186,3 +186,79 @@ project · · (lk) · estimated row count 1000 (missing stats) · · · table rtable@primary · · · spans FULL SCAN · · + +# Bounding box operations. +statement ok +SET CLUSTER SETTING sql.spatial.experimental_box2d_comparison_operators.enabled = on + +query TTTTT +EXPLAIN (VERBOSE) +SELECT lk, rk1, rk2 FROM ltable JOIN rtable@geom_index ON ltable.geom1 ~ rtable.geom +---- +· distribution local · · +· vectorized true · · +project · · (lk, rk1, rk2) · + │ estimated row count 326700 (missing stats) · · + └── lookup join (inner) · · (lk, geom1, rk1, rk2, geom) · + │ estimated row count 326700 (missing stats) · · + │ table rtable@primary · · + │ equality (rk1, rk2) = (rk1,rk2) · · + │ equality cols are key · · · + │ pred geom1 ~ geom · · + └── project · · (lk, geom1, rk1, rk2) · + │ estimated row count 10000 (missing stats) · · + └── inverted join · · (lk, geom1, rk1, rk2, geom_inverted_key) · + │ table rtable@geom_index · · + │ inverted expr st_covers(geom1, geom_inverted_key) · · + └── scan · · (lk, geom1) · +· estimated row count 1000 (missing stats) · · +· table ltable@primary · · +· spans FULL SCAN · · + +query TTTTT +EXPLAIN (VERBOSE) +SELECT lk, rk1, rk2 FROM ltable JOIN rtable@geom_index ON rtable.geom ~ ltable.geom1 +---- +· distribution local · · +· vectorized true · · +project · · (lk, rk1, rk2) · + │ estimated row count 326700 (missing stats) · · + └── lookup join (inner) · · (lk, geom1, rk1, rk2, geom) · + │ estimated row count 326700 (missing stats) · · + │ table rtable@primary · · + │ equality (rk1, rk2) = (rk1,rk2) · · + │ equality cols are key · · · + │ pred geom ~ geom1 · · + └── project · · (lk, geom1, rk1, rk2) · + │ estimated row count 10000 (missing stats) · · + └── inverted join · · (lk, geom1, rk1, rk2, geom_inverted_key) · + │ table rtable@geom_index · · + │ inverted expr st_coveredby(geom1, geom_inverted_key) · · + └── scan · · (lk, geom1) · +· estimated row count 1000 (missing stats) · · +· table ltable@primary · · +· spans FULL SCAN · · + +query TTTTT +EXPLAIN (VERBOSE) +SELECT lk, rk1, rk2 FROM ltable JOIN rtable@geom_index ON rtable.geom && ltable.geom1 +---- +· distribution local · · +· vectorized true · · +project · · (lk, rk1, rk2) · + │ estimated row count 326700 (missing stats) · · + └── lookup join (inner) · · (lk, geom1, rk1, rk2, geom) · + │ estimated row count 326700 (missing stats) · · + │ table rtable@primary · · + │ equality (rk1, rk2) = (rk1,rk2) · · + │ equality cols are key · · · + │ pred geom && geom1 · · + └── project · · (lk, geom1, rk1, rk2) · + │ estimated row count 10000 (missing stats) · · + └── inverted join · · (lk, geom1, rk1, rk2, geom_inverted_key) · + │ table rtable@geom_index · · + │ inverted expr st_intersects(geom1, geom_inverted_key) · · + └── scan · · (lk, geom1) · +· estimated row count 1000 (missing stats) · · +· table ltable@primary · · +· spans FULL SCAN · · diff --git a/pkg/sql/opt/invertedidx/geo.go b/pkg/sql/opt/invertedidx/geo.go index 5991a5cccfd9..865f89bc4cf7 100644 --- a/pkg/sql/opt/invertedidx/geo.go +++ b/pkg/sql/opt/invertedidx/geo.go @@ -31,12 +31,22 @@ import ( // This file contains functions for building geospatial inverted index scans // and joins that are used throughout the xform package. -// IsGeoIndexFunction returns true if the given function is a geospatial -// function that can be index-accelerated. -func IsGeoIndexFunction(fn opt.ScalarExpr) bool { - function := fn.(*memo.FunctionExpr) - _, ok := geoindex.RelationshipMap[function.Name] - return ok +// GetGeoIndexRelationship returns the corresponding geospatial relationship +// and ok=true if the given expression is either a geospatial function or +// bounding box comparison operator that can be index-accelerated. Otherwise +// returns ok=false. +func GetGeoIndexRelationship(expr opt.ScalarExpr) (_ geoindex.RelationshipType, ok bool) { + if function, ok := expr.(*memo.FunctionExpr); ok { + rel, ok := geoindex.RelationshipMap[function.Name] + return rel, ok + } + if _, ok := expr.(*memo.BBoxCoversExpr); ok { + return geoindex.Covers, true + } + if _, ok := expr.(*memo.BBoxIntersectsExpr); ok { + return geoindex.Intersects, true + } + return 0, false } // getSpanExprForGeoIndexFn is a function that returns a SpanExpression that @@ -103,7 +113,11 @@ func TryJoinGeoIndex( // derived, it is returned with ok=true. If no constraint can be derived, // then TryConstrainGeoIndex returns ok=false. func TryConstrainGeoIndex( - ctx context.Context, filters memo.FiltersExpr, tabID opt.TableID, index cat.Index, + ctx context.Context, + factory *norm.Factory, + filters memo.FiltersExpr, + tabID opt.TableID, + index cat.Index, ) (invertedConstraint *invertedexpr.SpanExpression, ok bool) { config := index.GeoConfig() var getSpanExpr getSpanExprForGeoIndexFn @@ -118,7 +132,7 @@ func TryConstrainGeoIndex( var invertedExpr invertedexpr.InvertedExpression for i := range filters { invertedExprLocal := constrainGeoIndex( - ctx, filters[i].Condition, tabID, index, getSpanExpr, + ctx, factory, filters[i].Condition, tabID, index, getSpanExpr, ) if invertedExpr == nil { invertedExpr = invertedExprLocal @@ -299,6 +313,7 @@ func joinGeoIndex( inputCols opt.ColSet, getSpanExpr getSpanExprForGeoIndexFn, ) opt.ScalarExpr { + var args memo.ScalarListExpr switch t := filterCond.(type) { case *memo.AndExpr: leftExpr := joinGeoIndex(ctx, factory, t.Left, tabID, index, inputCols, getSpanExpr) @@ -320,83 +335,103 @@ func joinGeoIndex( return factory.ConstructOr(leftExpr, rightExpr) case *memo.FunctionExpr: - // Try to extract an inverted join condition from the given function. If - // unsuccessful, try to extract a join condition from an equivalent function - // in which the arguments are commuted. For example: - // - // ST_Intersects(g1, g2) <-> ST_Intersects(g2, g1) - // ST_Covers(g1, g2) <-> ST_CoveredBy(g2, g1) - // - // See joinGeoIndexFromFunction for more details. - fn := joinGeoIndexFromFunction( - factory, t, false /* commuteArgs */, inputCols, tabID, index, - ) - if fn == nil { - fn = joinGeoIndexFromFunction( - factory, t, true /* commuteArgs */, inputCols, tabID, index, - ) + args = t.Args + + case *memo.BBoxCoversExpr, *memo.BBoxIntersectsExpr: + args = memo.ScalarListExpr{ + t.Child(0).(opt.ScalarExpr), t.Child(1).(opt.ScalarExpr), + } + // Cast the arguments to type Geometry if they are type Box2d. + for i := 0; i < len(args); i++ { + if args[i].DataType().Family() == types.Box2DFamily { + args[i] = factory.ConstructCast(args[i], types.Geometry) + } } - return fn default: return nil } + + // Try to extract an inverted join condition from the given filter condition. + // If unsuccessful, try to extract a join condition from an equivalent + // function in which the arguments are commuted. For example: + // + // ST_Intersects(g1, g2) <-> ST_Intersects(g2, g1) + // ST_Covers(g1, g2) <-> ST_CoveredBy(g2, g1) + // g1 && g2 -> ST_Intersects(g2, g1) + // g1 ~ g2 -> ST_CoveredBy(g2, g1) + // + // See joinGeoIndexFromExpr for more details. + fn := joinGeoIndexFromExpr( + factory, filterCond, args, false /* commuteArgs */, inputCols, tabID, index, + ) + if fn == nil { + fn = joinGeoIndexFromExpr( + factory, filterCond, args, true /* commuteArgs */, inputCols, tabID, index, + ) + } + return fn } -// joinGeoIndexFromFunction tries to extract an inverted join condition from the -// given geospatial function. If commuteArgs is true, joinGeoIndexFromFunction -// tries to extract an inverted join condition from an equivalent version of the -// given function in which the first two arguments are swapped. +// joinGeoIndexFromExpr tries to extract an inverted join condition from the +// given expression, which should be either a function or comparison operation. +// If commuteArgs is true, joinGeoIndexFromExpr tries to extract an inverted +// join condition from an equivalent version of the given expression in which +// the first two arguments are swapped. // -// Returns the original function if commuteArgs is false, or a new function -// representing the same relationship but with commuted arguments if -// commuteArgs is true. For example: +// If commuteArgs is false, returns the original function (if the expression +// was a function) or a new function representing the geospatial relationship +// of the comparison operation. If commuteArgs is true, returns a new function +// representing the same relationship but with commuted arguments. For example: // // ST_Intersects(g1, g2) <-> ST_Intersects(g2, g1) // ST_Covers(g1, g2) <-> ST_CoveredBy(g2, g1) +// g1 && g2 -> ST_Intersects(g2, g1) +// g1 ~ g2 -> ST_CoveredBy(g2, g1) // // See geoindex.CommuteRelationshipMap for the full list of mappings. // // Returns nil if a join condition was not successfully extracted. -func joinGeoIndexFromFunction( +func joinGeoIndexFromExpr( factory *norm.Factory, - fn *memo.FunctionExpr, + expr opt.ScalarExpr, + args memo.ScalarListExpr, commuteArgs bool, inputCols opt.ColSet, tabID opt.TableID, index cat.Index, ) opt.ScalarExpr { - if !IsGeoIndexFunction(fn) { + rel, ok := GetGeoIndexRelationship(expr) + if !ok { return nil } // Extract the the inputs to the geospatial function. - if fn.Args.ChildCount() < 2 { + if args.ChildCount() < 2 { panic(errors.AssertionFailedf( "all index-accelerated geospatial functions should have at least two arguments", )) } - arg1, arg2 := fn.Args.Child(0), fn.Args.Child(1) + arg1, arg2 := args.Child(0), args.Child(1) if commuteArgs { arg1, arg2 = arg2, arg1 } // The first argument should either come from the input or be a constant. - variable, ok := arg1.(*memo.VariableExpr) - if ok { - if !inputCols.Contains(variable.Col) { - return nil - } - } else { - if !memo.CanExtractConstDatum(arg1) { + var p props.Shared + memo.BuildSharedProps(arg1, &p) + if !p.OuterCols.Empty() { + if !p.OuterCols.SubsetOf(inputCols) { return nil } + } else if !memo.CanExtractConstDatum(arg1) { + return nil } // The second argument should be a variable corresponding to the index // column. - variable, ok = arg2.(*memo.VariableExpr) + variable, ok := arg2.(*memo.VariableExpr) if !ok { return nil } @@ -406,8 +441,8 @@ func joinGeoIndexFromFunction( } // Any additional params must be constant. - for i := 2; i < fn.Args.ChildCount(); i++ { - if !memo.CanExtractConstDatum(fn.Args.Child(i)) { + for i := 2; i < args.ChildCount(); i++ { + if !memo.CanExtractConstDatum(args.Child(i)) { return nil } } @@ -416,7 +451,6 @@ func joinGeoIndexFromFunction( // Get the geospatial relationship that is equivalent to this one with the // arguments commuted, and construct a new function that represents that // relationship. - rel := geoindex.RelationshipMap[fn.Name] commutedRel, ok := geoindex.CommuteRelationshipMap[rel] if !ok { // It's not possible to commute this relationship. @@ -427,96 +461,131 @@ func joinGeoIndexFromFunction( // Copy the original arguments into a new list, and swap the first two // arguments. - args := make(memo.ScalarListExpr, len(fn.Args)) - copy(args, fn.Args) - args[0], args[1] = args[1], args[0] + commutedArgs := make(memo.ScalarListExpr, len(args)) + copy(commutedArgs, args) + commutedArgs[0], commutedArgs[1] = commutedArgs[1], commutedArgs[0] - props, overload, ok := memo.FindFunction(&args, name) - if !ok { - panic(errors.AssertionFailedf("could not find overload for %s", name)) - } - return factory.ConstructFunction(args, &memo.FunctionPrivate{ - Name: name, - Typ: fn.Typ, - Properties: props, - Overload: overload, - }) + return constructFunction(factory, name, commutedArgs) } - return fn + if _, ok := expr.(*memo.FunctionExpr); !ok { + // This expression was one of the bounding box comparison operators. + // Construct a function that represents the same geospatial relationship. + name := geoindex.RelationshipReverseMap[rel] + return constructFunction(factory, name, args) + } + + return expr +} + +// constructFunction finds a function overload matching the given name and +// argument types, and uses the factory to construct a function. The return +// type of the function must be bool. +func constructFunction( + factory *norm.Factory, name string, args memo.ScalarListExpr, +) opt.ScalarExpr { + props, overload, ok := memo.FindFunction(&args, name) + if !ok { + panic(errors.AssertionFailedf("could not find overload for %s", name)) + } + return factory.ConstructFunction(args, &memo.FunctionPrivate{ + Name: name, + Typ: types.Bool, + Properties: props, + Overload: overload, + }) } // constrainGeoIndex returns an InvertedExpression representing a constraint // of the given geospatial index. func constrainGeoIndex( ctx context.Context, + factory *norm.Factory, expr opt.ScalarExpr, tabID opt.TableID, index cat.Index, getSpanExpr getSpanExprForGeoIndexFn, ) invertedexpr.InvertedExpression { + var args memo.ScalarListExpr switch t := expr.(type) { case *memo.AndExpr: return invertedexpr.And( - constrainGeoIndex(ctx, t.Left, tabID, index, getSpanExpr), - constrainGeoIndex(ctx, t.Right, tabID, index, getSpanExpr), + constrainGeoIndex(ctx, factory, t.Left, tabID, index, getSpanExpr), + constrainGeoIndex(ctx, factory, t.Right, tabID, index, getSpanExpr), ) case *memo.OrExpr: return invertedexpr.Or( - constrainGeoIndex(ctx, t.Left, tabID, index, getSpanExpr), - constrainGeoIndex(ctx, t.Right, tabID, index, getSpanExpr), + constrainGeoIndex(ctx, factory, t.Left, tabID, index, getSpanExpr), + constrainGeoIndex(ctx, factory, t.Right, tabID, index, getSpanExpr), ) case *memo.FunctionExpr: - // Try to constrain the index with the given function. If the resulting - // inverted expression is not a SpanExpression, try constraining the index - // with an equivalent function in which the arguments are commuted. For - // example: - // - // ST_Intersects(g1, g2) <-> ST_Intersects(g2, g1) - // ST_Covers(g1, g2) <-> ST_CoveredBy(g2, g1) - // - // See geoindex.CommuteRelationshipMap for the full list of mappings. - invertedExpr := constrainGeoIndexFromFunction( - ctx, t, false /* commuteArgs */, tabID, index, getSpanExpr, - ) - if _, ok := invertedExpr.(invertedexpr.NonInvertedColExpression); ok { - invertedExpr = constrainGeoIndexFromFunction( - ctx, t, true /* commuteArgs */, tabID, index, getSpanExpr, - ) + args = t.Args + + case *memo.BBoxCoversExpr, *memo.BBoxIntersectsExpr: + args = memo.ScalarListExpr{ + t.Child(0).(opt.ScalarExpr), t.Child(1).(opt.ScalarExpr), + } + // Cast the arguments to type Geometry if they are type Box2d. + for i := 0; i < len(args); i++ { + if args[i].DataType().Family() == types.Box2DFamily { + args[i] = factory.ConstructCast(args[i], types.Geometry) + } } - return invertedExpr default: return invertedexpr.NonInvertedColExpression{} } + + // Try to constrain the index with the given expression. If the resulting + // inverted expression is not a SpanExpression, try constraining the index + // with an equivalent function in which the arguments are commuted. For + // example: + // + // ST_Intersects(g1, g2) <-> ST_Intersects(g2, g1) + // ST_Covers(g1, g2) <-> ST_CoveredBy(g2, g1) + // g1 && g2 -> ST_Intersects(g2, g1) + // g1 ~ g2 -> ST_CoveredBy(g2, g1) + // + // See geoindex.CommuteRelationshipMap for the full list of mappings. + invertedExpr := constrainGeoIndexFromExpr( + ctx, expr, args, false /* commuteArgs */, tabID, index, getSpanExpr, + ) + if _, ok := invertedExpr.(invertedexpr.NonInvertedColExpression); ok { + invertedExpr = constrainGeoIndexFromExpr( + ctx, expr, args, true /* commuteArgs */, tabID, index, getSpanExpr, + ) + } + return invertedExpr } -// constrainGeoIndexFromFunction returns an InvertedExpression representing a -// constraint of the given geospatial index, based on the given function. -// If commuteArgs is true, constrainGeoIndexFromFunction constrains the index -// based on an equivalent version of the given function in which the first two -// arguments are swapped. -func constrainGeoIndexFromFunction( +// constrainGeoIndexFromExpr returns an InvertedExpression representing a +// constraint of the given geospatial index, based on the given expression. +// If commuteArgs is true, constrainGeoIndexFromExpr constrains the index +// based on an equivalent version of the given expression in which the first +// two arguments are swapped. +func constrainGeoIndexFromExpr( ctx context.Context, - fn *memo.FunctionExpr, + expr opt.ScalarExpr, + args memo.ScalarListExpr, commuteArgs bool, tabID opt.TableID, index cat.Index, getSpanExpr getSpanExprForGeoIndexFn, ) invertedexpr.InvertedExpression { - if !IsGeoIndexFunction(fn) { + relationship, ok := GetGeoIndexRelationship(expr) + if !ok { return invertedexpr.NonInvertedColExpression{} } - if fn.Args.ChildCount() < 2 { + if args.ChildCount() < 2 { panic(errors.AssertionFailedf( "all index-accelerated geospatial functions should have at least two arguments", )) } - arg1, arg2 := fn.Args.Child(0), fn.Args.Child(1) + arg1, arg2 := args.Child(0), args.Child(1) if commuteArgs { arg1, arg2 = arg2, arg1 } @@ -540,14 +609,13 @@ func constrainGeoIndexFromFunction( // Any additional params must be constant. var additionalParams []tree.Datum - for i := 2; i < fn.Args.ChildCount(); i++ { - if !memo.CanExtractConstDatum(fn.Args.Child(i)) { + for i := 2; i < args.ChildCount(); i++ { + if !memo.CanExtractConstDatum(args.Child(i)) { return invertedexpr.NonInvertedColExpression{} } - additionalParams = append(additionalParams, memo.ExtractConstDatum(fn.Args.Child(i))) + additionalParams = append(additionalParams, memo.ExtractConstDatum(args.Child(i))) } - relationship := geoindex.RelationshipMap[fn.Name] if commuteArgs { relationship, ok = geoindex.CommuteRelationshipMap[relationship] if !ok { @@ -681,7 +749,7 @@ func NewGeoDatumsToInvertedExpr( // We know that the non-index param is the first param, because the // optimizer already commuted the arguments of any functions where that - // was not the case. See joinGeoIndexFromFunction for details. + // was not the case. See joinGeoIndexFromExpr for details. nonIndexParam := t.Exprs[0].(tree.TypedExpr) var additionalParams []tree.Datum diff --git a/pkg/sql/opt/invertedidx/geo_test.go b/pkg/sql/opt/invertedidx/geo_test.go index 69afc1068c39..c3bb666af335 100644 --- a/pkg/sql/opt/invertedidx/geo_test.go +++ b/pkg/sql/opt/invertedidx/geo_test.go @@ -34,14 +34,14 @@ func TestTryJoinGeoIndex(t *testing.T) { // Create the input table. if _, err := tc.ExecuteDDL( "CREATE TABLE t1 (geom1 GEOMETRY, geog1 GEOGRAPHY, geom11 GEOMETRY, geog11 GEOGRAPHY, " + - "inet1 INET)", + "inet1 INET, bbox1 box2d)", ); err != nil { t.Fatal(err) } // Create the indexed table. if _, err := tc.ExecuteDDL( - "CREATE TABLE t2 (geom2 GEOMETRY, geog2 GEOGRAPHY, inet2 INET, " + + "CREATE TABLE t2 (geom2 GEOMETRY, geog2 GEOGRAPHY, inet2 INET, bbox2 box2d, " + "INVERTED INDEX (geom2), INVERTED INDEX (geog2))", ); err != nil { t.Fatal(err) @@ -200,6 +200,78 @@ func TestTryJoinGeoIndex(t *testing.T) { "st_coveredby('SRID=4326;POINT(-40.23456 70.456772)'::geography, geog2)) AND " + "st_covers('SRID=4326;POINT(-42.89456 75.938299)'::geography, geog2)", }, + + // Bounding box operators. + { + filters: "bbox1 ~ geom2", + indexOrd: geomOrd, + invertedExpr: "st_covers(bbox1::geometry, geom2)", + }, + { + filters: "geom2 ~ bbox1", + indexOrd: geomOrd, + invertedExpr: "st_coveredby(bbox1::geometry, geom2)", + }, + { + filters: "geom1 ~ geom2", + indexOrd: geomOrd, + invertedExpr: "st_covers(geom1, geom2)", + }, + { + filters: "geom2 ~ geom1", + indexOrd: geomOrd, + invertedExpr: "st_coveredby(geom1, geom2)", + }, + { + filters: "bbox1 && geom2", + indexOrd: geomOrd, + invertedExpr: "st_intersects(bbox1::geometry, geom2)", + }, + { + filters: "geom2 && bbox1", + indexOrd: geomOrd, + invertedExpr: "st_intersects(bbox1::geometry, geom2)", + }, + { + filters: "geom1 && geom2", + indexOrd: geomOrd, + invertedExpr: "st_intersects(geom1, geom2)", + }, + { + filters: "geom2 && geom1", + indexOrd: geomOrd, + invertedExpr: "st_intersects(geom1, geom2)", + }, + { + filters: "geom2 && geom1 AND 'BOX(1 2, 3 4)'::box2d ~ geom2", + indexOrd: geomOrd, + invertedExpr: "st_intersects(geom1, geom2) AND " + + "st_covers('BOX(1 2, 3 4)'::box2d::geometry, geom2)", + }, + { + // Wrong index ordinal. + filters: "bbox1 ~ geom2", + indexOrd: geogOrd, + invertedExpr: "", + }, + { + // At least one column from the input is required. + filters: "bbox2 ~ geom2", + indexOrd: geomOrd, + invertedExpr: "", + }, + { + // At least one column from the input is required. + filters: "'BOX(1 2, 3 4)'::box2d ~ geom2", + indexOrd: geomOrd, + invertedExpr: "", + }, + { + // Wrong types. + filters: "geom1::string ~ geom2::string", + indexOrd: geomOrd, + invertedExpr: "", + }, } for _, tc := range testCases { @@ -318,6 +390,54 @@ func TestTryConstrainGeoIndex(t *testing.T) { indexOrd: geogOrd, ok: true, }, + + // Bounding box operators. + { + filters: "'BOX(1 2, 3 4)'::box2d ~ geom", + indexOrd: geomOrd, + ok: true, + }, + { + filters: "geom ~ 'BOX(1 2, 3 4)'::box2d", + indexOrd: geomOrd, + ok: true, + }, + { + filters: "'LINESTRING ( 0 0, 0 2 )'::geometry ~ geom", + indexOrd: geomOrd, + ok: true, + }, + { + filters: "geom ~ 'LINESTRING ( 0 0, 0 2 )'::geometry", + indexOrd: geomOrd, + ok: true, + }, + { + filters: "'BOX(1 2, 3 4)'::box2d && geom", + indexOrd: geomOrd, + ok: true, + }, + { + filters: "geom && 'BOX(1 2, 3 4)'::box2d", + indexOrd: geomOrd, + ok: true, + }, + { + filters: "'LINESTRING ( 0 0, 0 2 )'::geometry && geom", + indexOrd: geomOrd, + ok: true, + }, + { + filters: "geom && 'LINESTRING ( 0 0, 0 2 )'::geometry", + indexOrd: geomOrd, + ok: true, + }, + { + // Wrong index ordinal. + filters: "'BOX(1 2, 3 4)'::box2d ~ geom", + indexOrd: geogOrd, + ok: false, + }, } for _, tc := range testCases { @@ -331,7 +451,7 @@ func TestTryConstrainGeoIndex(t *testing.T) { // that is tested elsewhere. This is just testing that we are constraining // the index when we expect to. _, ok := invertedidx.TryConstrainGeoIndex( - evalCtx.Context, filters, tab, md.Table(tab).Index(tc.indexOrd), + evalCtx.Context, &f, filters, tab, md.Table(tab).Index(tc.indexOrd), ) if tc.ok != ok { t.Fatalf("expected %v, got %v", tc.ok, ok) diff --git a/pkg/sql/opt/operator.go b/pkg/sql/opt/operator.go index 6b6d212c00ae..d68f1bda401d 100644 --- a/pkg/sql/opt/operator.go +++ b/pkg/sql/opt/operator.go @@ -140,6 +140,8 @@ var ComparisonOpReverseMap = map[Operator]tree.ComparisonOperator{ JsonSomeExistsOp: tree.JSONSomeExists, JsonAllExistsOp: tree.JSONAllExists, OverlapsOp: tree.Overlaps, + BBoxCoversOp: tree.RegMatch, + BBoxIntersectsOp: tree.Overlaps, } // BinaryOpReverseMap maps from an optimizer operator type to a semantic tree @@ -259,7 +261,7 @@ func ScalarOperatorTransmitsNulls(op Operator) bool { case BitandOp, BitorOp, BitxorOp, PlusOp, MinusOp, MultOp, DivOp, FloorDivOp, ModOp, PowOp, EqOp, NeOp, LtOp, GtOp, LeOp, GeOp, LikeOp, NotLikeOp, ILikeOp, NotILikeOp, SimilarToOp, NotSimilarToOp, RegMatchOp, NotRegMatchOp, RegIMatchOp, - NotRegIMatchOp, ConstOp: + NotRegIMatchOp, ConstOp, BBoxCoversOp, BBoxIntersectsOp: return true default: @@ -274,7 +276,8 @@ func BoolOperatorRequiresNotNullArgs(op Operator) bool { case EqOp, LtOp, LeOp, GtOp, GeOp, NeOp, LikeOp, NotLikeOp, ILikeOp, NotILikeOp, SimilarToOp, NotSimilarToOp, - RegMatchOp, NotRegMatchOp, RegIMatchOp, NotRegIMatchOp: + RegMatchOp, NotRegMatchOp, RegIMatchOp, NotRegIMatchOp, BBoxCoversOp, + BBoxIntersectsOp: return true } return false diff --git a/pkg/sql/opt/ops/scalar.opt b/pkg/sql/opt/ops/scalar.opt index 4280da79366f..2cb9c8562440 100644 --- a/pkg/sql/opt/ops/scalar.opt +++ b/pkg/sql/opt/ops/scalar.opt @@ -471,6 +471,22 @@ define Overlaps { Right ScalarExpr } +# BBoxCovers is the ~ operator when used with geometry or bounding box +# operands. It maps to tree.RegMatch. +[Scalar, Bool, Comparison] +define BBoxCovers { + Left ScalarExpr + Right ScalarExpr +} + +# BBoxIntersects is the && operator when used with geometry or bounding box +# operands. It maps to tree.Overlaps. +[Scalar, Bool, Comparison] +define BBoxIntersects { + Left ScalarExpr + Right ScalarExpr +} + # AnyScalar is the form of ANY which refers to an ANY operation on a # tuple or array, as opposed to Any which operates on a subquery. [Scalar, Bool] diff --git a/pkg/sql/opt/optbuilder/scalar.go b/pkg/sql/opt/optbuilder/scalar.go index f1483d667787..1e38ee7cab15 100644 --- a/pkg/sql/opt/optbuilder/scalar.go +++ b/pkg/sql/opt/optbuilder/scalar.go @@ -280,7 +280,7 @@ func (b *Builder) buildScalar( } else { left := b.buildScalar(t.TypedLeft(), inScope, nil, nil, colRefs) right := b.buildScalar(t.TypedRight(), inScope, nil, nil, colRefs) - out = b.constructComparison(t.Operator, left, right) + out = b.constructComparison(t, left, right) } case *tree.DTuple: @@ -635,9 +635,9 @@ func (b *Builder) checkSubqueryOuterCols( } func (b *Builder) constructComparison( - cmp tree.ComparisonOperator, left, right opt.ScalarExpr, + cmp *tree.ComparisonExpr, left, right opt.ScalarExpr, ) opt.ScalarExpr { - switch cmp { + switch cmp.Operator { case tree.EQ: return b.factory.ConstructEq(left, right) case tree.LT: @@ -667,6 +667,13 @@ func (b *Builder) constructComparison( case tree.NotSimilarTo: return b.factory.ConstructNotSimilarTo(left, right) case tree.RegMatch: + leftFam, rightFam := cmp.Fn.LeftType.Family(), cmp.Fn.RightType.Family() + if (leftFam == types.GeometryFamily || leftFam == types.Box2DFamily) && + (rightFam == types.GeometryFamily || rightFam == types.Box2DFamily) { + // The ~ operator means "covers" when used with geometry or bounding box + // operands. + return b.factory.ConstructBBoxCovers(left, right) + } return b.factory.ConstructRegMatch(left, right) case tree.NotRegMatch: return b.factory.ConstructNotRegMatch(left, right) @@ -690,9 +697,16 @@ func (b *Builder) constructComparison( case tree.JSONSomeExists: return b.factory.ConstructJsonSomeExists(left, right) case tree.Overlaps: + leftFam, rightFam := cmp.Fn.LeftType.Family(), cmp.Fn.RightType.Family() + if (leftFam == types.GeometryFamily || leftFam == types.Box2DFamily) && + (rightFam == types.GeometryFamily || rightFam == types.Box2DFamily) { + // The && operator means "intersects" when used with geometry or bounding + // box operands. + return b.factory.ConstructBBoxIntersects(left, right) + } return b.factory.ConstructOverlaps(left, right) } - panic(errors.AssertionFailedf("unhandled comparison operator: %s", log.Safe(cmp))) + panic(errors.AssertionFailedf("unhandled comparison operator: %s", log.Safe(cmp.Operator))) } func (b *Builder) constructBinary( diff --git a/pkg/sql/opt/optbuilder/testdata/scalar b/pkg/sql/opt/optbuilder/testdata/scalar index eda5238313e1..06940dea4dfd 100644 --- a/pkg/sql/opt/optbuilder/testdata/scalar +++ b/pkg/sql/opt/optbuilder/testdata/scalar @@ -1303,3 +1303,97 @@ NOT NULL not [type=bool] └── cast: BOOL [type=bool] └── null [type=unknown] + +build-scalar vars=(geometry, geometry) +@1 ~ @2 +---- +b-box-covers [type=bool] + ├── variable: "@1":1 [type=geometry] + └── variable: "@2":2 [type=geometry] + +build-scalar vars=(geometry, box2d) +@1 ~ @2 +---- +b-box-covers [type=bool] + ├── variable: "@1":1 [type=geometry] + └── variable: "@2":2 [type=box2d] + +build-scalar vars=(box2d, geometry) +@1 ~ @2 +---- +b-box-covers [type=bool] + ├── variable: "@1":1 [type=box2d] + └── variable: "@2":2 [type=geometry] + +build-scalar vars=(box2d, box2d) +@1 ~ @2 +---- +b-box-covers [type=bool] + ├── variable: "@1":1 [type=box2d] + └── variable: "@2":2 [type=box2d] + +build-scalar vars=(geometry, geometry) +@1 && @2 +---- +b-box-intersects [type=bool] + ├── variable: "@1":1 [type=geometry] + └── variable: "@2":2 [type=geometry] + +build-scalar vars=(geometry, box2d) +@1 && @2 +---- +b-box-intersects [type=bool] + ├── variable: "@1":1 [type=geometry] + └── variable: "@2":2 [type=box2d] + +build-scalar vars=(box2d, geometry) +@1 && @2 +---- +b-box-intersects [type=bool] + ├── variable: "@1":1 [type=box2d] + └── variable: "@2":2 [type=geometry] + +build-scalar vars=(box2d, box2d) +@1 && @2 +---- +b-box-intersects [type=bool] + ├── variable: "@1":1 [type=box2d] + └── variable: "@2":2 [type=box2d] + +build-scalar vars=(string, string) +@1 ~ @2 +---- +reg-match [type=bool] + ├── variable: "@1":1 [type=string] + └── variable: "@2":2 [type=string] + +build-scalar vars=(inet, inet) +@1 && @2 +---- +overlaps [type=bool] + ├── variable: "@1":1 [type=inet] + └── variable: "@2":2 [type=inet] + +build-scalar vars=(int[], int[]) +@1 && @2 +---- +overlaps [type=bool] + ├── variable: "@1":1 [type=int[]] + └── variable: "@2":2 [type=int[]] + +build-scalar vars=(geometry[], geometry[]) +@1 && @2 +---- +overlaps [type=bool] + ├── variable: "@1":1 [type=geometry[]] + └── variable: "@2":2 [type=geometry[]] + +build-scalar vars=(string, geometry) +@1 ~ @2 +---- +error: unsupported comparison operator: ~ + +build-scalar vars=(geometry[], geometry) +@1 && @2 +---- +error: unsupported comparison operator: && diff --git a/pkg/sql/opt/xform/custom_funcs.go b/pkg/sql/opt/xform/custom_funcs.go index 0168f32d49b7..f0599dd02aef 100644 --- a/pkg/sql/opt/xform/custom_funcs.go +++ b/pkg/sql/opt/xform/custom_funcs.go @@ -1004,7 +1004,7 @@ func (c *CustomFuncs) GenerateInvertedIndexScans( // Check whether the filter can constrain the index. // TODO(rytaft): Unify these two cases so both return a spanExpr. spanExpr, geoOk = invertedidx.TryConstrainGeoIndex( - c.e.evalCtx.Context, filters, scanPrivate.Table, iter.Index(), + c.e.evalCtx.Context, c.e.f, filters, scanPrivate.Table, iter.Index(), ) if geoOk { // Geo index scans can never be tight, so remaining filters is always the diff --git a/pkg/sql/opt/xform/testdata/rules/join b/pkg/sql/opt/xform/testdata/rules/join index 36dd804d21f5..1c9c40f2ea4d 100644 --- a/pkg/sql/opt/xform/testdata/rules/join +++ b/pkg/sql/opt/xform/testdata/rules/join @@ -3457,6 +3457,107 @@ anti-join (cross) └── filters └── st_covers(c.geom:10, n.geom:16) [outer=(10,16), immutable] +# Bounding box operations. +opt expect=GenerateInvertedJoins +SELECT + n.name, c.boroname +FROM nyc_census_blocks AS c +JOIN nyc_neighborhoods@nyc_neighborhoods_geo_idx AS n +ON c.geom::box2d && n.geom +---- +project + ├── columns: name:15 boroname:9 + ├── immutable + └── inner-join (lookup nyc_neighborhoods) + ├── columns: c.boroname:9 c.geom:10 name:15 n.geom:16 + ├── key columns: [13] = [13] + ├── lookup columns are key + ├── immutable + ├── inner-join (inverted-lookup nyc_neighborhoods@nyc_neighborhoods_geo_idx) + │ ├── columns: c.boroname:9 c.geom:10 n.gid:13!null + │ ├── inverted-expr + │ │ └── st_intersects(c.geom:10::BOX2D::GEOMETRY, n.geom:16) + │ ├── scan c + │ │ └── columns: c.boroname:9 c.geom:10 + │ └── filters (true) + └── filters + └── c.geom:10::BOX2D && n.geom:16 [outer=(10,16), immutable] + +opt expect=GenerateInvertedJoins +SELECT + n.name, c.boroname +FROM nyc_census_blocks AS c +JOIN nyc_neighborhoods@nyc_neighborhoods_geo_idx AS n +ON c.geom::box2d ~ n.geom +---- +project + ├── columns: name:15 boroname:9 + ├── immutable + └── inner-join (lookup nyc_neighborhoods) + ├── columns: c.boroname:9 c.geom:10 name:15 n.geom:16 + ├── key columns: [13] = [13] + ├── lookup columns are key + ├── immutable + ├── inner-join (inverted-lookup nyc_neighborhoods@nyc_neighborhoods_geo_idx) + │ ├── columns: c.boroname:9 c.geom:10 n.gid:13!null + │ ├── inverted-expr + │ │ └── st_covers(c.geom:10::BOX2D::GEOMETRY, n.geom:16) + │ ├── scan c + │ │ └── columns: c.boroname:9 c.geom:10 + │ └── filters (true) + └── filters + └── c.geom:10::BOX2D ~ n.geom:16 [outer=(10,16), immutable] + +opt expect=GenerateInvertedJoins +SELECT + n.name, c.boroname +FROM nyc_census_blocks AS c +JOIN nyc_neighborhoods@nyc_neighborhoods_geo_idx AS n +ON n.geom ~ c.geom::box2d +---- +project + ├── columns: name:15 boroname:9 + ├── immutable + └── inner-join (lookup nyc_neighborhoods) + ├── columns: c.boroname:9 c.geom:10 name:15 n.geom:16!null + ├── key columns: [13] = [13] + ├── lookup columns are key + ├── immutable + ├── inner-join (inverted-lookup nyc_neighborhoods@nyc_neighborhoods_geo_idx) + │ ├── columns: c.boroname:9 c.geom:10 n.gid:13!null + │ ├── inverted-expr + │ │ └── st_coveredby(c.geom:10::BOX2D::GEOMETRY, n.geom:16) + │ ├── scan c + │ │ └── columns: c.boroname:9 c.geom:10 + │ └── filters (true) + └── filters + └── n.geom:16 ~ c.geom:10::BOX2D [outer=(10,16), immutable, constraints=(/16: (/NULL - ])] + +opt expect=GenerateInvertedJoins +SELECT + n.name, c.boroname +FROM nyc_census_blocks AS c +JOIN nyc_neighborhoods@nyc_neighborhoods_geo_idx AS n +ON n.geom ~ c.geom +---- +project + ├── columns: name:15 boroname:9 + ├── immutable + └── inner-join (lookup nyc_neighborhoods) + ├── columns: c.boroname:9 c.geom:10!null name:15 n.geom:16!null + ├── key columns: [13] = [13] + ├── lookup columns are key + ├── immutable + ├── inner-join (inverted-lookup nyc_neighborhoods@nyc_neighborhoods_geo_idx) + │ ├── columns: c.boroname:9 c.geom:10 n.gid:13!null + │ ├── inverted-expr + │ │ └── st_coveredby(c.geom:10, n.geom:16) + │ ├── scan c + │ │ └── columns: c.boroname:9 c.geom:10 + │ └── filters (true) + └── filters + └── n.geom:16 ~ c.geom:10 [outer=(10,16), immutable, constraints=(/10: (/NULL - ]; /16: (/NULL - ])] + # -------------------------------------------------- # GenerateZigZagJoins # -------------------------------------------------- diff --git a/pkg/sql/opt/xform/testdata/rules/select b/pkg/sql/opt/xform/testdata/rules/select index b9bad0d186e0..3cf42776fb83 100644 --- a/pkg/sql/opt/xform/testdata/rules/select +++ b/pkg/sql/opt/xform/testdata/rules/select @@ -2146,6 +2146,120 @@ project ├── st_covers('0101000020E61000009279E40F069E45C0BEE36FD63B1D5240', geog:4) [outer=(4), immutable] └── v:2 = 3 [outer=(2), constraints=(/2: [/3 - /3]; tight), fd=()-->(2)] +# Bounding box operations. +opt +SELECT k FROM g WHERE 'BOX(1 2, 3 4)'::box2d ~ geom +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── select + ├── columns: k:1!null geom:3 + ├── immutable + ├── key: (1) + ├── fd: (1)-->(3) + ├── index-join g + │ ├── columns: k:1!null geom:3 + │ ├── key: (1) + │ ├── fd: (1)-->(3) + │ └── inverted-filter + │ ├── columns: k:1!null + │ ├── inverted expression: /6 + │ │ ├── tight: false + │ │ └── union spans + │ │ ├── ["\xfd\x10\x00\x00\x00\x00\x00\x00\x00", "\xfd\x10\x00\x00\x00\x00\x00\x00\x00"] + │ │ └── ["\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "\xfd\x18") + │ ├── key: (1) + │ └── scan g@geom_idx + │ ├── columns: k:1!null geom_inverted_key:6!null + │ ├── inverted constraint: /6/1 + │ │ └── spans + │ │ ├── ["\xfd\x10\x00\x00\x00\x00\x00\x00\x00", "\xfd\x10\x00\x00\x00\x00\x00\x00\x00"] + │ │ └── ["\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "\xfd\x18") + │ ├── key: (1) + │ └── fd: (1)-->(6) + └── filters + └── 'BOX(1 2,3 4)' ~ geom:3 [outer=(3), immutable] + +opt +SELECT k FROM g WHERE geom ~ 'BOX(1 2, 3 4)'::box2d +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── select + ├── columns: k:1!null geom:3!null + ├── immutable + ├── key: (1) + ├── fd: (1)-->(3) + ├── index-join g + │ ├── columns: k:1!null geom:3 + │ ├── key: (1) + │ ├── fd: (1)-->(3) + │ └── inverted-filter + │ ├── columns: k:1!null + │ ├── inverted expression: /6 + │ │ ├── tight: false + │ │ └── union spans + │ │ ├── ["\xfd\x10\x00\x00\x00\x00\x00\x00\x00", "\xfd\x10\x00\x00\x00\x00\x00\x00\x00"] + │ │ └── ["\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] + │ ├── key: (1) + │ └── scan g@geom_idx + │ ├── columns: k:1!null geom_inverted_key:6!null + │ ├── inverted constraint: /6/1 + │ │ └── spans + │ │ ├── ["\xfd\x10\x00\x00\x00\x00\x00\x00\x00", "\xfd\x10\x00\x00\x00\x00\x00\x00\x00"] + │ │ └── ["\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] + │ ├── key: (1) + │ └── fd: (1)-->(6) + └── filters + └── geom:3 ~ 'BOX(1 2,3 4)' [outer=(3), immutable, constraints=(/3: (/NULL - ])] + +opt +SELECT k FROM g WHERE geom ~ 'MULTIPOINT((2.2 2.2), (3.0 3.0))'::geometry +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── select + ├── columns: k:1!null geom:3!null + ├── immutable + ├── key: (1) + ├── fd: (1)-->(3) + ├── index-join g + │ ├── columns: k:1!null geom:3 + │ ├── key: (1) + │ ├── fd: (1)-->(3) + │ └── inverted-filter + │ ├── columns: k:1!null + │ ├── inverted expression: /6 + │ │ ├── tight: false + │ │ ├── union spans + │ │ │ ├── ["\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] + │ │ │ └── ["\xfd\x10\x00\x00\x00\x00\x00\x00\x00", "\xfd\x10\x00\x00\x00\x00\x00\x00\x00"] + │ │ └── INTERSECTION + │ │ ├── span expression + │ │ │ ├── tight: false + │ │ │ └── union spans: ["\xfd\x15\x00\x00\x00\x00\x00\x00\x00", "\xfd\x15\x00\x00\x00\x00\x00\x00\x00"] + │ │ └── span expression + │ │ ├── tight: false + │ │ └── union spans: ["\xfd\x11\x00\x00\x00\x00\x00\x00\x00", "\xfd\x11\x00\x00\x00\x00\x00\x00\x00"] + │ ├── key: (1) + │ └── scan g@geom_idx + │ ├── columns: k:1!null geom_inverted_key:6!null + │ ├── inverted constraint: /6/1 + │ │ └── spans + │ │ ├── ["\xfd\x10\x00\x00\x00\x00\x00\x00\x00", "\xfd\x10\x00\x00\x00\x00\x00\x00\x00"] + │ │ ├── ["\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] + │ │ ├── ["\xfd\x11\x00\x00\x00\x00\x00\x00\x00", "\xfd\x11\x00\x00\x00\x00\x00\x00\x00"] + │ │ └── ["\xfd\x15\x00\x00\x00\x00\x00\x00\x00", "\xfd\x15\x00\x00\x00\x00\x00\x00\x00"] + │ ├── key: (1) + │ └── fd: (1)-->(6) + └── filters + └── geom:3 ~ '01040000000200000001010000009A999999999901409A99999999990140010100000000000000000008400000000000000840' [outer=(3), immutable, constraints=(/3: (/NULL - ])] # -------------------------------------------------- # SplitDisjunction