From 01f7eae28c8c4a11a9c4ca0f4dfb5fd2fc6acb8f Mon Sep 17 00:00:00 2001 From: Gabor Pali Date: Sun, 30 Jul 2023 17:33:30 +0200 Subject: [PATCH] mango: introduce strict index selection It is not always beneficial for the performance if the Mango query planner tries to assign an index to the selector. User-specified indexes may save the day, but since they are only hints for the planner, automated overrides can still happen. Introduce the concept of "strict index selection" which lets the user to request the exclusive use of a specific index. When it is not possible, give up on planning and return an HTTP 400 response right away. This way the user has the chance to learn about the missing index, request its creation and try again later. The feature comes with a configuration toggle. By default, the feature is disabled to maintain backward compatibility, but the user may ask for this behavior via the new `use_index_strict` query parameter for a specific query. Note that, similarly, `use_index_strict` could be used to disable strict index selection temporarily when it is enabled on the server. Fixes #4511 --- rel/overlay/etc/default.ini | 6 ++ src/docs/src/api/database/find.rst | 58 +++++++++------ src/docs/src/config/query-servers.rst | 17 +++++ src/mango/src/mango_cursor.erl | 87 +++++++++++++++++++++-- src/mango/src/mango_error.erl | 22 ++++++ src/mango/src/mango_opts.erl | 6 ++ src/mango/test/02-basic-find-test.py | 1 + src/mango/test/05-index-selection-test.py | 19 +++++ src/mango/test/mango.py | 3 + 9 files changed, 189 insertions(+), 30 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index bb440e0aab6..c3fc89f991b 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -496,6 +496,12 @@ authentication_db = _users ; the warning. ;index_scan_warning_threshold = 10 +; Set to true to make index selection strict. Indexes always have to be +; specified by the use_index parameter, they are not selected automatically +; for the query. An error is returned if the user-specified index is not +; found. This can be overridden (inverted) ad-hoc by the query parameters. +;strict_index_selection = false + [indexers] couch_mrview = true diff --git a/src/docs/src/api/database/find.rst b/src/docs/src/api/database/find.rst index 4aaa0688cfa..aa1dfe07515 100644 --- a/src/docs/src/api/database/find.rst +++ b/src/docs/src/api/database/find.rst @@ -45,6 +45,10 @@ :"`` or ``["", ""]``. *Optional* + :" - ], - "include_docs": true, - "partition": null, - "reduce": false, "stable": false, - "start_key": [ - 2010 - ], - "update": true, - "view_type": "map" + "execution_stats": false }, "limit": 2, "skip": 0, @@ -1505,14 +1494,32 @@ it easier to take advantage of future improvements to query planning "year", "title" ], - "partitioned": false + "mrargs": { + "include_docs": true, + "view_type": "map", + "reduce": false, + "partition": null, + "start_key": [ + 2010 + ], + "end_key": [ + "" + ], + "direction": "fwd", + "stable": false, + "update": true, + "conflicts": "undefined" + }, + "covered": false } Index selection =============== :ref:`_find ` chooses which index to use for responding -to a query, unless you specify an index at query time. +to a query, unless you specify an index at query time. Note that this +is just a hint for the query planner and as such it might be ignored +if the named index is not suitable for performing the query. The query planner looks at the selector section and finds the index with the closest match to operators and fields used in the query. If there are two @@ -1525,3 +1532,8 @@ the index with the first alphabetical name is chosen. It is good practice to specify indexes explicitly in your queries. This prevents existing queries being affected by new indexes that might get added in a production environment. + +Setting the ``use_index_strict`` parameter to ``true`` disables the +automated query planning. In that case, one must specify an index +which will be checked for usability and an error is return when it is +not specified or it is not usable. diff --git a/src/docs/src/config/query-servers.rst b/src/docs/src/config/query-servers.rst index cf6963fdcd0..ff869e356d9 100644 --- a/src/docs/src/config/query-servers.rst +++ b/src/docs/src/config/query-servers.rst @@ -309,3 +309,20 @@ Mango is the Query Engine that services the :ref:`_find `, endpoin [mango] index_scan_warning_threshold = 10 + + .. config:option:: strict_index_selection :: Strict index selection. + + .. versionadded:: 3.4 + + Make the index selection strict. The query planner will not + try to find a suitable index automatically but use the + user-specified index and give up immediately in case of + failure. This implictly makes the use of the ``use_index`` + query parameter mandatory. Using the ``use_index_strict`` + Boolean parameter, this behavior may be adjusted per query. + + Defaults to ``false``. + :: + + [mango] + strict_index_selection = false diff --git a/src/mango/src/mango_cursor.erl b/src/mango/src/mango_cursor.erl index cabeec22f1a..7e272b18514 100644 --- a/src/mango/src/mango_cursor.erl +++ b/src/mango/src/mango_cursor.erl @@ -47,10 +47,26 @@ create(Db, Selector0, Opts) -> Selector = mango_selector:normalize(Selector0), UsableIndexes = mango_idx:get_usable_indexes(Db, Selector, Opts), - case mango_cursor:maybe_filter_indexes_by_ddoc(UsableIndexes, Opts) of + case maybe_filter_indexes_by_ddoc(UsableIndexes, Opts) of [] -> - % use_index doesn't match a valid index - fall back to a valid one - create_cursor(Db, UsableIndexes, Selector, Opts); + % use_index doesn't match a valid index - determine how + % this shall be handlded by the settings + case (use_index_strict(Opts)) of + true -> + % return an error + Details = + case use_index(Opts) of + [] -> + []; + UseIndex -> + [DesignId | Rest] = UseIndex, + [ddoc_name(DesignId) | Rest] + end, + ?MANGO_ERROR({invalid_index, Details}); + false -> + % fall back to a valid one + create_cursor(Db, UsableIndexes, Selector, Opts) + end; UserSpecifiedIndex -> create_cursor(Db, UserSpecifiedIndex, Selector, Opts) end. @@ -93,13 +109,25 @@ execute(#cursor{index = Idx} = Cursor, UserFun, UserAcc) -> Mod = mango_idx:cursor_mod(Idx), Mod:execute(Cursor, UserFun, UserAcc). +use_index(Opts) -> + {use_index, UseIndex} = lists:keyfind(use_index, 1, Opts), + UseIndex. + +use_index_strict(Opts) -> + case lists:keyfind(use_index_strict, 1, Opts) of + {use_index_strict, ByQuery} -> + ByQuery; + false -> + config:get_boolean("mango", "strict_index_selection", false) + end. + maybe_filter_indexes_by_ddoc(Indexes, Opts) -> - case lists:keyfind(use_index, 1, Opts) of - {use_index, []} -> + case use_index(Opts) of + [] -> []; - {use_index, [DesignId]} -> + [DesignId] -> filter_indexes(Indexes, DesignId); - {use_index, [DesignId, ViewName]} -> + [DesignId, ViewName] -> filter_indexes(Indexes, DesignId, ViewName) end. @@ -263,3 +291,48 @@ ddoc_name(<<"_design/", Name/binary>>) -> Name; ddoc_name(Name) -> Name. + +-ifdef(TEST). +-include_lib("couch/include/couch_eunit.hrl"). + +use_index_strict_test_() -> + { + foreach, + fun() -> + meck:new(config) + end, + fun(_) -> + meck:unload(config) + end, + [ + ?TDEF_FE(t_use_index_strict_disabled_not_requested), + ?TDEF_FE(t_use_index_strict_disabled_requested), + ?TDEF_FE(t_use_index_strict_enabled_not_requested), + ?TDEF_FE(t_use_index_strict_enabled_requested) + ] + }. + +t_use_index_strict_disabled_not_requested(_) -> + meck:expect(config, get_boolean, ["mango", "strict_index_selection", '_'], meck:val(false)), + Options1 = [{use_index_strict, false}], + ?assertEqual(false, use_index_strict(Options1)), + Options2 = [], + ?assertEqual(false, use_index_strict(Options2)). + +t_use_index_strict_disabled_requested(_) -> + meck:expect(config, get_boolean, ["mango", "strict_index_selection", '_'], meck:val(false)), + Options = [{use_index_strict, true}], + ?assertEqual(true, use_index_strict(Options)). + +t_use_index_strict_enabled_not_requested(_) -> + meck:expect(config, get_boolean, ["mango", "strict_index_selection", '_'], meck:val(true)), + Options1 = [{use_index_strict, false}], + ?assertEqual(false, use_index_strict(Options1)), + Options2 = [], + ?assertEqual(true, use_index_strict(Options2)). + +t_use_index_strict_enabled_requested(_) -> + meck:expect(config, get_boolean, ["mango", "strict_index_selection", '_'], meck:val(true)), + Options = [{use_index_strict, true}], + ?assertEqual(true, use_index_strict(Options)). +-endif. diff --git a/src/mango/src/mango_error.erl b/src/mango/src/mango_error.erl index f1c343854f3..989e9fa1245 100644 --- a/src/mango/src/mango_error.erl +++ b/src/mango/src/mango_error.erl @@ -48,6 +48,28 @@ info(mango_json_bookmark, {invalid_bookmark, BadBookmark}) -> <<"invalid_bookmark">>, fmt("Invalid bookmark value: ~s", [?JSON_ENCODE(BadBookmark)]) }; +info(mango_cursor, {invalid_index, []}) -> + { + 400, + <<"invalid_index">>, + <<"You must specify an index with the `use_index` parameter.">> + }; +info(mango_cursor, {invalid_index, [DDocName]}) -> + { + 400, + <<"invalid_index">>, + fmt("_design/~s specified by `use_index` could not be found or it is not suitable.", [ + DDocName + ]) + }; +info(mango_cursor, {invalid_index, [DDocName, ViewName]}) -> + { + 400, + <<"invalid_index">>, + fmt("_design/~s, ~s specified by `use_index` could not be found or it is not suitable.", [ + DDocName, ViewName + ]) + }; info(mango_cursor_text, {invalid_bookmark, BadBookmark}) -> { 400, diff --git a/src/mango/src/mango_opts.erl b/src/mango/src/mango_opts.erl index 96aa0eb42ef..7ac7687d517 100644 --- a/src/mango/src/mango_opts.erl +++ b/src/mango/src/mango_opts.erl @@ -91,6 +91,12 @@ validate_find({Props}) -> {default, []}, {validator, fun validate_use_index/1} ]}, + {<<"use_index_strict">>, [ + {tag, use_index_strict}, + {optional, true}, + {default, false}, + {validator, fun mango_opts:is_boolean/1} + ]}, {<<"bookmark">>, [ {tag, bookmark}, {optional, true}, diff --git a/src/mango/test/02-basic-find-test.py b/src/mango/test/02-basic-find-test.py index 6544df8489f..a3ecf97183a 100644 --- a/src/mango/test/02-basic-find-test.py +++ b/src/mango/test/02-basic-find-test.py @@ -311,6 +311,7 @@ def test_explain_options(self): assert opts["stale"] == False assert opts["update"] == True assert opts["use_index"] == [] + assert opts["use_index_strict"] == False def test_sort_with_all_docs(self): explain = self.db.find( diff --git a/src/mango/test/05-index-selection-test.py b/src/mango/test/05-index-selection-test.py index a4c15608a09..ed10295a6b0 100644 --- a/src/mango/test/05-index-selection-test.py +++ b/src/mango/test/05-index-selection-test.py @@ -211,6 +211,25 @@ def test_explain_sort_reverse(self): ) self.assertEqual(resp_explain["index"]["type"], "json") + def test_strict_index_selection(self): + with self.subTest(with_use_index=True): + try: + self.db.find( + {"manager": True}, use_index="invalid", use_index_strict=True + ) + except Exception as e: + self.assertEqual(e.response.status_code, 400) + else: + raise AssertionError("did not fail on invalid index") + + with self.subTest(with_use_index=False): + try: + self.db.find({"manager": True}, use_index_strict=True) + except Exception as e: + self.assertEqual(e.response.status_code, 400) + else: + raise AssertionError("did not fail due to missing use_index") + class JSONIndexSelectionTests(mango.UserDocsTests, IndexSelectionTests): @classmethod diff --git a/src/mango/test/mango.py b/src/mango/test/mango.py index ad9747a92fb..044a61e753d 100644 --- a/src/mango/test/mango.py +++ b/src/mango/test/mango.py @@ -248,6 +248,7 @@ def find( update=True, executionStats=False, partition=None, + use_index_strict=None, ): body = { "selector": selector, @@ -267,6 +268,8 @@ def find( body["update"] = False if executionStats == True: body["execution_stats"] = True + if use_index_strict is not None: + body["use_index_strict"] = use_index_strict body = json.dumps(body) if partition: ppath = "_partition/{}/".format(partition)