Skip to content

Commit

Permalink
scan: fix filter erroneous early exit
Browse files Browse the repository at this point in the history
The issue described below is related to the read operations which
allows to scan: `crud.select`, `crud.pairs`, `crud.count`,
`readview:select` and `readview:pairs`.

The erroneous behavior reported by [1] and #418 is as follows:
- result changes when reordering operation conditions;
- when `>=` condition operation is changed to `=`, there are more rows
  in the result.

The reason is as follows. Scanning read operates with two entities:
an iterator and a filter. The iterator includes an index, a starting
value and iterator type (EQ, GT, etc.). The iterator is built from
conditions, if possible, otherwise primary index is used. Remaining
conditions form the filter, so the actual result satisfies all operation
conditions.

The filter supports early exit. Let's consider the following example.
For `crud.select(space, {{'>=', 'id', 1}, {'<=', 'id', 10}})`, where
`id` is an index (or an indexed field), the iterator uses index
`id`, starts from key = `1` and goes by GE rules, covering [1, +inf)
ordered keys. On the other hand, when iterator reaches the tuple
with `id` = `11`, all tuples after this one will never satisfy
the second condition, because our iterator yields tuples sorted by `id`
(due to underlying index). So filter tells than there is no reason
to scan anymore, and we finish the scanning procedure.

Before this patch, the function behind early exit decision had worked
as follows: "if the condition is an index, we go in forward (reverse)
order and `<=` or `<` (`>=` or `>`) condition is violated, there is no
reason to scan anymore". But the valid approach is "if the condition is
SCANNING index...". Before this patch, filter had assumed that if the
condition for index is specified, tuples are ordered, but it works only
if iterator uses the same index as in the condition. This patch fixes
the issue.

The erroneous behavior may happen in the following case:
- there are multiple conditions,
- there are at least two different index operands,
- non-scanning index condition uses `<=`, `<`, `>=` or `>` operation.

1. https://jira.vk.team/browse/TNT-941

Closes #418
  • Loading branch information
DifferentialOrange committed Feb 5, 2024
1 parent 511fb02 commit 52e3deb
Show file tree
Hide file tree
Showing 15 changed files with 304 additions and 26 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
### Fixed
* Compatibility with vshard configuration if UUIDs are omitted (#407).
* Compatibility with automatic master discovery in vshard (#409).
* Secondary conditions for index operands with operations `>=`, `<=`, `>`, `<`
no longer cause missing part of the actual result for scan operations
(`crud.select`, `crud.pairs`, `crud.count`, `readview:select`,
`readview:pairs`) (#418).

## [1.4.2] - 25-12-23

Expand Down
24 changes: 12 additions & 12 deletions crud/compare/filters.lua
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,8 @@ local filters = {}
- opposite condition `'id' < 100` becomes false
- in such case we can exit from iteration
]]
local function is_early_exit_possible(index, tarantool_iter, condition)
if index == nil then
return false
end

if index.name ~= condition.operand then
local function is_early_exit_possible(scan_index, tarantool_iter, condition)
if scan_index.name ~= condition.operand then
return false
end

Expand Down Expand Up @@ -77,8 +73,8 @@ local function get_values_opts(index)
return index_field_opts
end

local function parse(space, conditions, opts)
dev_checks('table', '?table', {
local function parse(space, scan_index, conditions, opts)
dev_checks('table', 'table', '?table', {
scan_condition_num = '?number',
tarantool_iter = 'number',
})
Expand Down Expand Up @@ -137,7 +133,11 @@ local function parse(space, conditions, opts)
operator = condition.operator,
values = condition.values,
types = fields_types,
early_exit_is_possible = is_early_exit_possible(index, opts.tarantool_iter, condition),
early_exit_is_possible = is_early_exit_possible(
scan_index,
opts.tarantool_iter,
condition
),
values_opts = values_opts,
})
end
Expand Down Expand Up @@ -645,13 +645,13 @@ local function compile(filter_code)
return func
end

function filters.gen_func(space, conditions, opts)
dev_checks('table', '?table', {
function filters.gen_func(space, scan_index, conditions, opts)
dev_checks('table', 'table', '?table', {
tarantool_iter = 'number',
scan_condition_num = '?number',
})

local filter_conditions, err = parse(space, conditions, {
local filter_conditions, err = parse(space, scan_index, conditions, {
scan_condition_num = opts.scan_condition_num,
tarantool_iter = opts.tarantool_iter,
})
Expand Down
2 changes: 1 addition & 1 deletion crud/count.lua
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ local function count_on_storage(space_name, index_id, conditions, opts)

local value = opts.scan_value

local filter_func, err = filters.gen_func(space, conditions, {
local filter_func, err = filters.gen_func(space, index, conditions, {
tarantool_iter = opts.tarantool_iter,
scan_condition_num = opts.scan_condition_num,
})
Expand Down
2 changes: 1 addition & 1 deletion crud/readview.lua
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ local function select_readview_on_storage(space_name, index_id, conditions, opts
return nil, err
end

local filter_func, err = select_filters.gen_func(space, conditions, {
local filter_func, err = select_filters.gen_func(space, index, conditions, {
tarantool_iter = opts.tarantool_iter,
scan_condition_num = opts.scan_condition_num,
})
Expand Down
2 changes: 1 addition & 1 deletion crud/select.lua
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ local function select_on_storage(space_name, index_id, conditions, opts)
return nil, err
end

local filter_func, err = select_filters.gen_func(space, conditions, {
local filter_func, err = select_filters.gen_func(space, index, conditions, {
tarantool_iter = opts.tarantool_iter,
scan_condition_num = opts.scan_condition_num,
})
Expand Down
35 changes: 35 additions & 0 deletions test/entrypoint/srv_select/storage_init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -192,4 +192,39 @@ return function()
if_not_exists = true,
})
end

local logins_space = box.schema.space.create('logins', {
format = {
{name = 'id', type = 'unsigned'},
{name = 'bucket_id', type = 'unsigned'},
{name = 'city', type = 'string'},
{name = 'name', type = 'string'},
{name = 'last_login', type = 'number'},
},
if_not_exists = true,
engine = engine,
})

logins_space:create_index('id_index', {
parts = { 'id' },
if_not_exists = true,
})

logins_space:create_index('bucket_id', {
parts = { 'bucket_id' },
unique = false,
if_not_exists = true,
})

logins_space:create_index('city', {
parts = { 'city' },
unique = false,
if_not_exists = true,
})

logins_space:create_index('last_login', {
parts = { 'last_login' },
unique = false,
if_not_exists = true,
})
end
14 changes: 14 additions & 0 deletions test/helper.lua
Original file line number Diff line number Diff line change
Expand Up @@ -944,4 +944,18 @@ function helpers.assert_str_contains_pattern_with_replicaset_id(str, pattern)
t.assert(found, ("string %q does not contain pattern %q"):format(str, pattern))
end

function helpers.prepare_ordered_data(g, space, expected_objects, bucket_id, order_condition)
helpers.insert_objects(g, space, expected_objects)

local resp, err = g.cluster.main_server:call('crud.select', {
space,
{order_condition},
{bucket_id = bucket_id, mode = 'write'},
})
t.assert_equals(err, nil)

local objects = crud.unflatten_rows(resp.rows, resp.metadata)
t.assert_equals(objects, expected_objects)
end

return helpers
26 changes: 26 additions & 0 deletions test/integration/pairs_readview_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ local t = require('luatest')
local crud_utils = require('crud.common.utils')

local helpers = require('test.helper')
local read_scenario = require('test.integration.read_scenario')

local pgroup = t.group('pairs_readview', helpers.backend_matrix({
{engine = 'memtx'},
Expand Down Expand Up @@ -880,3 +881,28 @@ pgroup.test_pairs_no_map_reduce = function(g)
local diff_2 = map_reduces_after_2 - map_reduces_after_1
t.assert_equals(diff_2, 0, 'Select request was not a map reduce')
end

pgroup.test_gh_418_pairs_with_secondary_noneq_index_condition = function(g)
local read = function(cg, space, conditions, opts)
opts = table.deepcopy(opts) or {}
opts.use_tomap = true

return cg.cluster.main_server:exec(function(space, conditions, opts)
local crud = require('crud')

local rv, err = crud.readview()
t.assert_equals(err, nil)

local status, resp = pcall(function()
return rv:pairs(space, conditions, opts):totable()
end)
t.assert(status, resp)

rv:close()

return resp, nil
end, {space, conditions, opts})
end

read_scenario.gh_418_read_with_secondary_noneq_index_condition(g, read)
end
22 changes: 22 additions & 0 deletions test/integration/pairs_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ local t = require('luatest')
local crud_utils = require('crud.common.utils')

local helpers = require('test.helper')
local read_scenario = require('test.integration.read_scenario')

local pgroup = t.group('pairs', helpers.backend_matrix({
{engine = 'memtx'},
Expand Down Expand Up @@ -891,3 +892,24 @@ pgroup.test_pairs_no_map_reduce = function(g)
local diff_2 = map_reduces_after_2 - map_reduces_after_1
t.assert_equals(diff_2, 0, 'Select request was not a map reduce')
end

pgroup.test_gh_418_pairs_with_secondary_noneq_index_condition = function(g)
local read = function(cg, space, conditions, opts)
opts = table.deepcopy(opts) or {}
opts.mode = 'write'
opts.use_tomap = true

return cg.cluster.main_server:exec(function(space, conditions, opts)
local crud = require('crud')

local status, resp = pcall(function()
return crud.pairs(space, conditions, opts):totable()
end)
t.assert(status, resp)

return resp, nil
end, {space, conditions, opts})
end

read_scenario.gh_418_read_with_secondary_noneq_index_condition(g, read)
end
62 changes: 62 additions & 0 deletions test/integration/read_scenario.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
-- crud.select/crud.pairs/readview:select/readview:pairs
-- have a lot of common scenarios, which are mostly tested with
-- four nearly identical copypasted test functions now.
-- This approach is expected to improve it at least for new test cases.
-- Scenarios here are for `srv_select` entrypoint.

local t = require('luatest')

local helpers = require('test.helper')

local function gh_418_read_with_secondary_noneq_index_condition(cg, read)
-- Pin bucket_id to reproduce issue on a single storage.
local PINNED_BUCKET_NO = 1

local expected_objects = {
{
id = 1,
bucket_id = PINNED_BUCKET_NO,
city = 'Tatsumi Port Island',
name = 'Yukari',
last_login = 42,
},
{
id = 2,
bucket_id = PINNED_BUCKET_NO,
city = 'Tatsumi Port Island',
name = 'Junpei',
last_login = 52,
},
{
id = 3,
bucket_id = PINNED_BUCKET_NO,
city = 'Tatsumi Port Island',
name = 'Mitsuru',
last_login = 42,
},
}

helpers.prepare_ordered_data(cg,
'logins',
expected_objects,
PINNED_BUCKET_NO,
{'=', 'city', 'Tatsumi Port Island'}
)

-- Issue https://github.com/tarantool/crud/issues/418 is as follows:
-- storage iterator exits early on the second tuple because
-- iterator had erroneously expected tuples to be sorted by `last_login`
-- index while iterating on `city` index. Before the issue had beed fixed,
-- user had received only one record instead of two.
local objects = read(cg,
'logins',
{{'=', 'city', 'Tatsumi Port Island'}, {'<=', 'last_login', 42}},
{bucket_id = PINNED_BUCKET_NO}
)

t.assert_equals(objects, {expected_objects[1], expected_objects[3]})
end

return {
gh_418_read_with_secondary_noneq_index_condition = gh_418_read_with_secondary_noneq_index_condition,
}
20 changes: 20 additions & 0 deletions test/integration/select_readview_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ local crud_utils = require('crud.common.utils')


local helpers = require('test.helper')
local read_scenario = require('test.integration.read_scenario')

local pgroup = t.group('select_readview', helpers.backend_matrix({
{engine = 'memtx'},
Expand Down Expand Up @@ -2484,3 +2485,22 @@ pgroup.test_select_closed_readview = function(g)

t.assert_str_contains(err.str, 'Read view is closed')
end

pgroup.test_gh_418_select_with_secondary_noneq_index_condition = function(g)
local read = function(cg, space, conditions, opts)
return cg.cluster.main_server:exec(function(space, conditions, opts)
local crud = require('crud')
local rv, err = crud.readview()
t.assert_equals(err, nil)

local resp, err = rv:select(space, conditions, opts)
t.assert_equals(err, nil)

rv:close()

return crud.unflatten_rows(resp.rows, resp.metadata)
end, {space, conditions, opts})
end

read_scenario.gh_418_read_with_secondary_noneq_index_condition(g, read)
end
16 changes: 16 additions & 0 deletions test/integration/select_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ local crud = require('crud')
local crud_utils = require('crud.common.utils')

local helpers = require('test.helper')
local read_scenario = require('test.integration.read_scenario')

local pgroup = t.group('select', helpers.backend_matrix({
{engine = 'memtx'},
Expand Down Expand Up @@ -32,6 +33,7 @@ pgroup.before_each(function(g)
helpers.truncate_space_on_cluster(g.cluster, 'customers')
helpers.truncate_space_on_cluster(g.cluster, 'developers')
helpers.truncate_space_on_cluster(g.cluster, 'cars')
helpers.truncate_space_on_cluster(g.cluster, 'logins')
end)


Expand Down Expand Up @@ -2265,3 +2267,17 @@ pgroup.test_pairs_yield_every_0 = function(g)
})
end)
end

pgroup.test_gh_418_select_with_secondary_noneq_index_condition = function(g)
local read = function(cg, space, conditions, opts)
opts = table.deepcopy(opts) or {}
opts.mode = 'write'

local resp, err = cg.cluster.main_server:call('crud.select', {space, conditions, opts})
t.assert_equals(err, nil)

return crud.unflatten_rows(resp.rows, resp.metadata)
end

read_scenario.gh_418_read_with_secondary_noneq_index_condition(g, read)
end
Loading

0 comments on commit 52e3deb

Please sign in to comment.