From 52e3deb7c793d18f190f5c252edce5bd29ee564e Mon Sep 17 00:00:00 2001 From: Georgy Moiseev Date: Fri, 2 Feb 2024 16:49:36 +0300 Subject: [PATCH] scan: fix filter erroneous early exit The issue described below is related to the read operations which allows to scan: `crud.select`, `crud.pairs`, `crud.count`, `readview:select` and `readview:pairs`. The erroneous behavior reported by [1] and #418 is as follows: - result changes when reordering operation conditions; - when `>=` condition operation is changed to `=`, there are more rows in the result. The reason is as follows. Scanning read operates with two entities: an iterator and a filter. The iterator includes an index, a starting value and iterator type (EQ, GT, etc.). The iterator is built from conditions, if possible, otherwise primary index is used. Remaining conditions form the filter, so the actual result satisfies all operation conditions. The filter supports early exit. Let's consider the following example. For `crud.select(space, {{'>=', 'id', 1}, {'<=', 'id', 10}})`, where `id` is an index (or an indexed field), the iterator uses index `id`, starts from key = `1` and goes by GE rules, covering [1, +inf) ordered keys. On the other hand, when iterator reaches the tuple with `id` = `11`, all tuples after this one will never satisfy the second condition, because our iterator yields tuples sorted by `id` (due to underlying index). So filter tells than there is no reason to scan anymore, and we finish the scanning procedure. Before this patch, the function behind early exit decision had worked as follows: "if the condition is an index, we go in forward (reverse) order and `<=` or `<` (`>=` or `>`) condition is violated, there is no reason to scan anymore". But the valid approach is "if the condition is SCANNING index...". Before this patch, filter had assumed that if the condition for index is specified, tuples are ordered, but it works only if iterator uses the same index as in the condition. This patch fixes the issue. The erroneous behavior may happen in the following case: - there are multiple conditions, - there are at least two different index operands, - non-scanning index condition uses `<=`, `<`, `>=` or `>` operation. 1. https://jira.vk.team/browse/TNT-941 Closes #418 --- CHANGELOG.md | 4 + crud/compare/filters.lua | 24 +++--- crud/count.lua | 2 +- crud/readview.lua | 2 +- crud/select.lua | 2 +- test/entrypoint/srv_select/storage_init.lua | 35 +++++++++ test/helper.lua | 14 ++++ test/integration/pairs_readview_test.lua | 26 +++++++ test/integration/pairs_test.lua | 22 ++++++ test/integration/read_scenario.lua | 62 ++++++++++++++++ test/integration/select_readview_test.lua | 20 +++++ test/integration/select_test.lua | 16 ++++ test/unit/select_executor_test.lua | 16 ++-- test/unit/select_filters_test.lua | 82 ++++++++++++++++++++- test/unit/select_filters_uuid_test.lua | 3 +- 15 files changed, 304 insertions(+), 26 deletions(-) create mode 100644 test/integration/read_scenario.lua diff --git a/CHANGELOG.md b/CHANGELOG.md index d6e4e382..6ec273ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Fixed * Compatibility with vshard configuration if UUIDs are omitted (#407). * Compatibility with automatic master discovery in vshard (#409). +* Secondary conditions for index operands with operations `>=`, `<=`, `>`, `<` + no longer cause missing part of the actual result for scan operations + (`crud.select`, `crud.pairs`, `crud.count`, `readview:select`, + `readview:pairs`) (#418). ## [1.4.2] - 25-12-23 diff --git a/crud/compare/filters.lua b/crud/compare/filters.lua index 145e0190..c2e39868 100644 --- a/crud/compare/filters.lua +++ b/crud/compare/filters.lua @@ -17,12 +17,8 @@ local filters = {} - opposite condition `'id' < 100` becomes false - in such case we can exit from iteration ]] -local function is_early_exit_possible(index, tarantool_iter, condition) - if index == nil then - return false - end - - if index.name ~= condition.operand then +local function is_early_exit_possible(scan_index, tarantool_iter, condition) + if scan_index.name ~= condition.operand then return false end @@ -77,8 +73,8 @@ local function get_values_opts(index) return index_field_opts end -local function parse(space, conditions, opts) - dev_checks('table', '?table', { +local function parse(space, scan_index, conditions, opts) + dev_checks('table', 'table', '?table', { scan_condition_num = '?number', tarantool_iter = 'number', }) @@ -137,7 +133,11 @@ local function parse(space, conditions, opts) operator = condition.operator, values = condition.values, types = fields_types, - early_exit_is_possible = is_early_exit_possible(index, opts.tarantool_iter, condition), + early_exit_is_possible = is_early_exit_possible( + scan_index, + opts.tarantool_iter, + condition + ), values_opts = values_opts, }) end @@ -645,13 +645,13 @@ local function compile(filter_code) return func end -function filters.gen_func(space, conditions, opts) - dev_checks('table', '?table', { +function filters.gen_func(space, scan_index, conditions, opts) + dev_checks('table', 'table', '?table', { tarantool_iter = 'number', scan_condition_num = '?number', }) - local filter_conditions, err = parse(space, conditions, { + local filter_conditions, err = parse(space, scan_index, conditions, { scan_condition_num = opts.scan_condition_num, tarantool_iter = opts.tarantool_iter, }) diff --git a/crud/count.lua b/crud/count.lua index 470bcce5..7e0cc5d2 100644 --- a/crud/count.lua +++ b/crud/count.lua @@ -52,7 +52,7 @@ local function count_on_storage(space_name, index_id, conditions, opts) local value = opts.scan_value - local filter_func, err = filters.gen_func(space, conditions, { + local filter_func, err = filters.gen_func(space, index, conditions, { tarantool_iter = opts.tarantool_iter, scan_condition_num = opts.scan_condition_num, }) diff --git a/crud/readview.lua b/crud/readview.lua index d5ff6dd5..fcd0a832 100644 --- a/crud/readview.lua +++ b/crud/readview.lua @@ -160,7 +160,7 @@ local function select_readview_on_storage(space_name, index_id, conditions, opts return nil, err end - local filter_func, err = select_filters.gen_func(space, conditions, { + local filter_func, err = select_filters.gen_func(space, index, conditions, { tarantool_iter = opts.tarantool_iter, scan_condition_num = opts.scan_condition_num, }) diff --git a/crud/select.lua b/crud/select.lua index a095b589..dc53062a 100644 --- a/crud/select.lua +++ b/crud/select.lua @@ -78,7 +78,7 @@ local function select_on_storage(space_name, index_id, conditions, opts) return nil, err end - local filter_func, err = select_filters.gen_func(space, conditions, { + local filter_func, err = select_filters.gen_func(space, index, conditions, { tarantool_iter = opts.tarantool_iter, scan_condition_num = opts.scan_condition_num, }) diff --git a/test/entrypoint/srv_select/storage_init.lua b/test/entrypoint/srv_select/storage_init.lua index 5bc49aa4..ce49d82e 100644 --- a/test/entrypoint/srv_select/storage_init.lua +++ b/test/entrypoint/srv_select/storage_init.lua @@ -192,4 +192,39 @@ return function() if_not_exists = true, }) end + + local logins_space = box.schema.space.create('logins', { + format = { + {name = 'id', type = 'unsigned'}, + {name = 'bucket_id', type = 'unsigned'}, + {name = 'city', type = 'string'}, + {name = 'name', type = 'string'}, + {name = 'last_login', type = 'number'}, + }, + if_not_exists = true, + engine = engine, + }) + + logins_space:create_index('id_index', { + parts = { 'id' }, + if_not_exists = true, + }) + + logins_space:create_index('bucket_id', { + parts = { 'bucket_id' }, + unique = false, + if_not_exists = true, + }) + + logins_space:create_index('city', { + parts = { 'city' }, + unique = false, + if_not_exists = true, + }) + + logins_space:create_index('last_login', { + parts = { 'last_login' }, + unique = false, + if_not_exists = true, + }) end diff --git a/test/helper.lua b/test/helper.lua index 6fbfaad5..30c8cde3 100644 --- a/test/helper.lua +++ b/test/helper.lua @@ -944,4 +944,18 @@ function helpers.assert_str_contains_pattern_with_replicaset_id(str, pattern) t.assert(found, ("string %q does not contain pattern %q"):format(str, pattern)) end +function helpers.prepare_ordered_data(g, space, expected_objects, bucket_id, order_condition) + helpers.insert_objects(g, space, expected_objects) + + local resp, err = g.cluster.main_server:call('crud.select', { + space, + {order_condition}, + {bucket_id = bucket_id, mode = 'write'}, + }) + t.assert_equals(err, nil) + + local objects = crud.unflatten_rows(resp.rows, resp.metadata) + t.assert_equals(objects, expected_objects) +end + return helpers diff --git a/test/integration/pairs_readview_test.lua b/test/integration/pairs_readview_test.lua index f3bb7085..29ab2c4a 100644 --- a/test/integration/pairs_readview_test.lua +++ b/test/integration/pairs_readview_test.lua @@ -3,6 +3,7 @@ local t = require('luatest') local crud_utils = require('crud.common.utils') local helpers = require('test.helper') +local read_scenario = require('test.integration.read_scenario') local pgroup = t.group('pairs_readview', helpers.backend_matrix({ {engine = 'memtx'}, @@ -880,3 +881,28 @@ pgroup.test_pairs_no_map_reduce = function(g) local diff_2 = map_reduces_after_2 - map_reduces_after_1 t.assert_equals(diff_2, 0, 'Select request was not a map reduce') end + +pgroup.test_gh_418_pairs_with_secondary_noneq_index_condition = function(g) + local read = function(cg, space, conditions, opts) + opts = table.deepcopy(opts) or {} + opts.use_tomap = true + + return cg.cluster.main_server:exec(function(space, conditions, opts) + local crud = require('crud') + + local rv, err = crud.readview() + t.assert_equals(err, nil) + + local status, resp = pcall(function() + return rv:pairs(space, conditions, opts):totable() + end) + t.assert(status, resp) + + rv:close() + + return resp, nil + end, {space, conditions, opts}) + end + + read_scenario.gh_418_read_with_secondary_noneq_index_condition(g, read) +end diff --git a/test/integration/pairs_test.lua b/test/integration/pairs_test.lua index e291457b..60d1b527 100644 --- a/test/integration/pairs_test.lua +++ b/test/integration/pairs_test.lua @@ -3,6 +3,7 @@ local t = require('luatest') local crud_utils = require('crud.common.utils') local helpers = require('test.helper') +local read_scenario = require('test.integration.read_scenario') local pgroup = t.group('pairs', helpers.backend_matrix({ {engine = 'memtx'}, @@ -891,3 +892,24 @@ pgroup.test_pairs_no_map_reduce = function(g) local diff_2 = map_reduces_after_2 - map_reduces_after_1 t.assert_equals(diff_2, 0, 'Select request was not a map reduce') end + +pgroup.test_gh_418_pairs_with_secondary_noneq_index_condition = function(g) + local read = function(cg, space, conditions, opts) + opts = table.deepcopy(opts) or {} + opts.mode = 'write' + opts.use_tomap = true + + return cg.cluster.main_server:exec(function(space, conditions, opts) + local crud = require('crud') + + local status, resp = pcall(function() + return crud.pairs(space, conditions, opts):totable() + end) + t.assert(status, resp) + + return resp, nil + end, {space, conditions, opts}) + end + + read_scenario.gh_418_read_with_secondary_noneq_index_condition(g, read) +end diff --git a/test/integration/read_scenario.lua b/test/integration/read_scenario.lua new file mode 100644 index 00000000..7adc2cfd --- /dev/null +++ b/test/integration/read_scenario.lua @@ -0,0 +1,62 @@ +-- crud.select/crud.pairs/readview:select/readview:pairs +-- have a lot of common scenarios, which are mostly tested with +-- four nearly identical copypasted test functions now. +-- This approach is expected to improve it at least for new test cases. +-- Scenarios here are for `srv_select` entrypoint. + +local t = require('luatest') + +local helpers = require('test.helper') + +local function gh_418_read_with_secondary_noneq_index_condition(cg, read) + -- Pin bucket_id to reproduce issue on a single storage. + local PINNED_BUCKET_NO = 1 + + local expected_objects = { + { + id = 1, + bucket_id = PINNED_BUCKET_NO, + city = 'Tatsumi Port Island', + name = 'Yukari', + last_login = 42, + }, + { + id = 2, + bucket_id = PINNED_BUCKET_NO, + city = 'Tatsumi Port Island', + name = 'Junpei', + last_login = 52, + }, + { + id = 3, + bucket_id = PINNED_BUCKET_NO, + city = 'Tatsumi Port Island', + name = 'Mitsuru', + last_login = 42, + }, + } + + helpers.prepare_ordered_data(cg, + 'logins', + expected_objects, + PINNED_BUCKET_NO, + {'=', 'city', 'Tatsumi Port Island'} + ) + + -- Issue https://github.com/tarantool/crud/issues/418 is as follows: + -- storage iterator exits early on the second tuple because + -- iterator had erroneously expected tuples to be sorted by `last_login` + -- index while iterating on `city` index. Before the issue had beed fixed, + -- user had received only one record instead of two. + local objects = read(cg, + 'logins', + {{'=', 'city', 'Tatsumi Port Island'}, {'<=', 'last_login', 42}}, + {bucket_id = PINNED_BUCKET_NO} + ) + + t.assert_equals(objects, {expected_objects[1], expected_objects[3]}) +end + +return { + gh_418_read_with_secondary_noneq_index_condition = gh_418_read_with_secondary_noneq_index_condition, +} diff --git a/test/integration/select_readview_test.lua b/test/integration/select_readview_test.lua index 6a003d9a..796fcffe 100644 --- a/test/integration/select_readview_test.lua +++ b/test/integration/select_readview_test.lua @@ -7,6 +7,7 @@ local crud_utils = require('crud.common.utils') local helpers = require('test.helper') +local read_scenario = require('test.integration.read_scenario') local pgroup = t.group('select_readview', helpers.backend_matrix({ {engine = 'memtx'}, @@ -2484,3 +2485,22 @@ pgroup.test_select_closed_readview = function(g) t.assert_str_contains(err.str, 'Read view is closed') end + +pgroup.test_gh_418_select_with_secondary_noneq_index_condition = function(g) + local read = function(cg, space, conditions, opts) + return cg.cluster.main_server:exec(function(space, conditions, opts) + local crud = require('crud') + local rv, err = crud.readview() + t.assert_equals(err, nil) + + local resp, err = rv:select(space, conditions, opts) + t.assert_equals(err, nil) + + rv:close() + + return crud.unflatten_rows(resp.rows, resp.metadata) + end, {space, conditions, opts}) + end + + read_scenario.gh_418_read_with_secondary_noneq_index_condition(g, read) +end diff --git a/test/integration/select_test.lua b/test/integration/select_test.lua index 1a16f28d..dff15ee8 100644 --- a/test/integration/select_test.lua +++ b/test/integration/select_test.lua @@ -4,6 +4,7 @@ local crud = require('crud') local crud_utils = require('crud.common.utils') local helpers = require('test.helper') +local read_scenario = require('test.integration.read_scenario') local pgroup = t.group('select', helpers.backend_matrix({ {engine = 'memtx'}, @@ -32,6 +33,7 @@ pgroup.before_each(function(g) helpers.truncate_space_on_cluster(g.cluster, 'customers') helpers.truncate_space_on_cluster(g.cluster, 'developers') helpers.truncate_space_on_cluster(g.cluster, 'cars') + helpers.truncate_space_on_cluster(g.cluster, 'logins') end) @@ -2265,3 +2267,17 @@ pgroup.test_pairs_yield_every_0 = function(g) }) end) end + +pgroup.test_gh_418_select_with_secondary_noneq_index_condition = function(g) + local read = function(cg, space, conditions, opts) + opts = table.deepcopy(opts) or {} + opts.mode = 'write' + + local resp, err = cg.cluster.main_server:call('crud.select', {space, conditions, opts}) + t.assert_equals(err, nil) + + return crud.unflatten_rows(resp.rows, resp.metadata) + end + + read_scenario.gh_418_read_with_secondary_noneq_index_condition(g, read) +end diff --git a/test/unit/select_executor_test.lua b/test/unit/select_executor_test.lua index 2623b744..a61f7e9d 100644 --- a/test/unit/select_executor_test.lua +++ b/test/unit/select_executor_test.lua @@ -89,7 +89,7 @@ g.test_one_condition_no_index = function() t.assert_equals(err, nil) local index = space.index[plan.index_id] - local filter_func, err = select_filters.gen_func(space, conditions, { + local filter_func, err = select_filters.gen_func(space, index, conditions, { tarantool_iter = plan.tarantool_iter, scan_condition_num = plan.scan_condition_num, }) @@ -149,7 +149,7 @@ g.test_one_condition_with_index = function() t.assert_equals(err, nil) local index = space.index[plan.index_id] - local filter_func, err = select_filters.gen_func(space, conditions, { + local filter_func, err = select_filters.gen_func(space, index, conditions, { tarantool_iter = plan.tarantool_iter, scan_condition_num = plan.scan_condition_num, }) @@ -205,7 +205,7 @@ g.test_multiple_conditions = function() t.assert_equals(err, nil) local index = space.index[plan.index_id] - local filter_func, err = select_filters.gen_func(space, conditions, { + local filter_func, err = select_filters.gen_func(space, index, conditions, { tarantool_iter = plan.tarantool_iter, scan_condition_num = plan.scan_condition_num, }) @@ -256,7 +256,7 @@ g.test_composite_index = function() t.assert_equals(err, nil) local index = space.index[plan.index_id] - local filter_func, err = select_filters.gen_func(space, conditions, { + local filter_func, err = select_filters.gen_func(space, index, conditions, { tarantool_iter = plan.tarantool_iter, scan_condition_num = plan.scan_condition_num, }) @@ -304,7 +304,7 @@ g.test_get_by_id = function() t.assert_equals(err, nil) local index = space.index[plan.index_id] - local filter_func, err = select_filters.gen_func(space, conditions, { + local filter_func, err = select_filters.gen_func(space, index, conditions, { tarantool_iter = plan.tarantool_iter, scan_condition_num = plan.scan_condition_num, }) @@ -345,7 +345,7 @@ g.test_early_exit = function() t.assert_equals(err, nil) local index = space.index[plan.index_id] - local filter_func, err = select_filters.gen_func(space, conditions, { + local filter_func, err = select_filters.gen_func(space, index, conditions, { tarantool_iter = plan.tarantool_iter, scan_condition_num = plan.scan_condition_num, }) @@ -382,7 +382,7 @@ g.test_select_all = function() t.assert_equals(err, nil) local index = space.index[plan.index_id] - local filter_func, err = select_filters.gen_func(space, nil, { + local filter_func, err = select_filters.gen_func(space, index, nil, { tarantool_iter = plan.tarantool_iter, scan_condition_num = plan.scan_condition_num, }) @@ -419,7 +419,7 @@ g.test_limit = function() t.assert_equals(err, nil) local index = space.index[plan.index_id] - local filter_func, err = select_filters.gen_func(space, nil, { + local filter_func, err = select_filters.gen_func(space, index, nil, { tarantool_iter = plan.tarantool_iter, scan_condition_num = plan.scan_condition_num, }) diff --git a/test/unit/select_filters_test.lua b/test/unit/select_filters_test.lua index a8272c8e..6dead2a0 100644 --- a/test/unit/select_filters_test.lua +++ b/test/unit/select_filters_test.lua @@ -112,8 +112,9 @@ g.test_parse = function() t.assert_equals(err, nil) local space = box.space.customers + local scan_index = space.index[plan.index_id] - local filter_conditions, err = select_filters.internal.parse(space, conditions, { + local filter_conditions, err = select_filters.internal.parse(space, scan_index, conditions, { scan_condition_num = plan.scan_condition_num, tarantool_iter = plan.tarantool_iter, }) @@ -166,6 +167,80 @@ g.test_parse = function() t.assert_equals(has_a_car_opts.collation, nil) end +local gh_418_cases = { + scan_condition = { + eq = { + conditions = cond_funcs.eq('age', 20), + }, + le = { + conditions = cond_funcs.le('age', 20), + }, + lt = { + conditions = cond_funcs.lt('age', 20), + }, + ge = { + conditions = cond_funcs.ge('age', 20), + }, + gt = { + conditions = cond_funcs.gt('age', 20), + }, + req = { + conditions = cond_funcs.eq('age', 20), + opts = {first = -1}, + }, + }, + secondary_condition = { + eq = { + conditions = cond_funcs.eq('full_name', {'Ivan', 'Ivanov'}), + }, + le = { + conditions = cond_funcs.le('full_name', {'Ivan', 'Ivanov'}), + }, + lt = { + conditions = cond_funcs.lt('full_name', {'Ivan', 'Ivanov'}), + }, + ge = { + conditions = cond_funcs.ge('full_name', {'Ivan', 'Ivanov'}), + }, + gt = { + conditions = cond_funcs.gt('full_name', {'Ivan', 'Ivanov'}), + }, + } +} + +for scan_name, scan_case in pairs(gh_418_cases.scan_condition) do + for sec_name, sec_case in pairs(gh_418_cases.secondary_condition) do + local test_name = ('test_gh_418_scan_%s_secondary_%s_no_early_exit'):format(scan_name, sec_name) + + g[test_name] = function() + local conditions = { + scan_case.conditions, + sec_case.conditions, + } + + local plan, err = select_plan.new(box.space.customers, conditions, scan_case.opts) + t.assert_equals(err, nil) + + local space = box.space.customers + local scan_index = space.index[plan.index_id] + + local filter_conditions, err = select_filters.internal.parse( + space, + scan_index, + conditions, + { + scan_condition_num = plan.scan_condition_num, + tarantool_iter = plan.tarantool_iter, + } + ) + t.assert_equals(err, nil) + + local full_name_filter_condition = filter_conditions[1] + t.assert_equals(full_name_filter_condition.early_exit_is_possible, false) + end + end +end + g.test_one_condition_number = function() local filter_conditions = { { @@ -868,7 +943,10 @@ g.test_jsonpath_indexes = function() local plan, err = select_plan.new(box.space.cars, conditions) t.assert_equals(err, nil) - local filter_conditions, err = select_filters.internal.parse(box.space.cars, conditions, { + local space = box.space.cars + local scan_index = space.index[plan.index_id] + + local filter_conditions, err = select_filters.internal.parse(space, scan_index, conditions, { scan_condition_num = plan.scan_condition_num, tarantool_iter = plan.tarantool_iter, }) diff --git a/test/unit/select_filters_uuid_test.lua b/test/unit/select_filters_uuid_test.lua index ee6b1f2e..f273a4b6 100644 --- a/test/unit/select_filters_uuid_test.lua +++ b/test/unit/select_filters_uuid_test.lua @@ -63,8 +63,9 @@ g.test_parse = function() t.assert_equals(err, nil) local space = box.space.customers + local scan_index = space.index[plan.index_id] - local filter_conditions, err = select_filters.internal.parse(space, conditions, { + local filter_conditions, err = select_filters.internal.parse(space, scan_index, conditions, { scan_condition_num = plan.scan_condition_num, tarantool_iter = plan.tarantool_iter, })