Skip to content

Commit

Permalink
Implementation of batch upsert
Browse files Browse the repository at this point in the history
Batch upsert is mostly used for operation with
one bucket / one Tarantool node in a transaction.
In this case batch upsert is more efficient
then upserting tuple-by-tuple.
Right now CRUD cannot provide batch upsert with full consistency.
CRUD offers batch upsert with partial consistency. That means
that full consistency can be provided only on single replicaset
using `box` transactions.

Part of #193
  • Loading branch information
AnaNek committed Jun 27, 2022
1 parent 5e5bbc0 commit 063c4ba
Show file tree
Hide file tree
Showing 11 changed files with 2,688 additions and 7 deletions.
95 changes: 95 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ It also provides the `crud-storage` and `crud-router` roles for
- [Delete](#delete)
- [Replace](#replace)
- [Upsert](#upsert)
- [Upsert many](#upsert-many)
- [Select](#select)
- [Select conditions](#select-conditions)
- [Pairs](#pairs)
Expand Down Expand Up @@ -524,6 +525,100 @@ crud.upsert_object('customers',
...
```

### Upsert many

```lua
-- Upsert batch of tuples
local result, err = crud.upsert_many(space_name, tuples, operations, opts)
-- Upsert batch of objects
local result, err = crud.upsert_object_many(space_name, objects, operations, opts)
```

where:

* `space_name` (`string`) - name of the space to insert an object
* `tuples` / `objects` (`table`) - array of tuples/objects to insert
* `operations` (`table`) - update [operations](https://www.tarantool.io/en/doc/latest/reference/reference_lua/box_space/#box-space-update) if there is an existing tuple which matches the key fields of tuple
* `opts`:
* `timeout` (`?number`) - `vshard.call` timeout (in seconds)
* `fields` (`?table`) - field names for getting only a subset of fields
* `stop_on_error` (`?boolean`) - stop on a first error and report error
regarding the failed operation and error about what tuples were not
performed, default is `false`
* `rollback_on_error` (`?boolean`) - any failed operation will lead to
rollback on a storage, where the operation is failed, report error
about what tuples were rollback, default is `false`

Returns metadata and array of empty arrays, array of errors.
Error object can contain field `operation_data`.

This field can contain:
* tuple for which the error occurred;
* object with an incorrect format;
* tuple the operation on which was performed but
operation was rollback;
* tuple the operation on which was not performed
because operation was stopped by error.

Right now CRUD cannot provide batch upsert with full consistency.
CRUD offers batch upsert with partial consistency. That means
that full consistency can be provided only on single replicaset
using `box` transactions.

**Example:**

```lua
crud.upsert_many('customers', {
{1, box.NULL, 'Elizabeth', 23},
{2, box.NULL, 'Anastasia', 22},
},{{'+', 'age', 1}, {'+', 'age', 2}})
---
- metadata:
- {'name': 'id', 'type': 'unsigned'}
- {'name': 'bucket_id', 'type': 'unsigned'}
- {'name': 'name', 'type': 'string'}
- {'name': 'age', 'type': 'number'}
rows:
- []
- []
...
crud.upsert_object_many('customers', {
{id = 3, name = 'Elizabeth', age = 24},
{id = 10, name = 'Anastasia', age = 21},
}, {{'+', 'age', 1}, {'+', 'age', 2}})
---
- metadata:
- {'name': 'id', 'type': 'unsigned'}
- {'name': 'bucket_id', 'type': 'unsigned'}
- {'name': 'name', 'type': 'string'}
- {'name': 'age', 'type': 'number'}
rows:
- []
- []

-- Partial success
local res, errs = crud.upsert_object_many('customers', {
{id = 22, name = 'Alex', age = 34},
{id = 3, name = 'Anastasia', age = 22},
{id = 5, name = 'Sergey', age = 25},
}, {{'+', 'age', 12}, {'=', 'age', 'invalid type'}, {'+', 'age', 10}})
---
res
- metadata:
- {'name': 'id', 'type': 'unsigned'}
- {'name': 'bucket_id', 'type': 'unsigned'}
- {'name': 'name', 'type': 'string'}
- {'name': 'age', 'type': 'number'}
rows:
- [],
- [],

#errs -- 1
errs[1].class_name -- BatchUpsertError
errs[1].err -- 'Tuple field 4 (age) type does not match one required by operation <...>'
errs[1].tuple -- {3, 2804, 'Anastasia', 22}
...
```

### Select

Expand Down
10 changes: 10 additions & 0 deletions crud.lua
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ local replace = require('crud.replace')
local get = require('crud.get')
local update = require('crud.update')
local upsert = require('crud.upsert')
local upsert_many = require('crud.upsert_many')
local delete = require('crud.delete')
local select = require('crud.select')
local truncate = require('crud.truncate')
Expand Down Expand Up @@ -60,6 +61,14 @@ crud.update = stats.wrap(update.call, stats.op.UPDATE)
-- @function upsert
crud.upsert = stats.wrap(upsert.tuple, stats.op.UPSERT)

-- @refer upsert_many.tuples
-- @function upsert_many
crud.upsert_many = upsert_many.tuples

-- @refer upsert_many.objects
-- @function upsert_object_many
crud.upsert_object_many = upsert_many.objects

-- @refer upsert.object
-- @function upsert
crud.upsert_object = stats.wrap(upsert.object, stats.op.UPSERT)
Expand Down Expand Up @@ -138,6 +147,7 @@ function crud.init_storage()
replace.init()
update.init()
upsert.init()
upsert_many.init()
delete.init()
select.init()
truncate.init()
Expand Down
2 changes: 1 addition & 1 deletion crud/common/map_call_cases/batch_insert_iter.lua
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ function BatchInsertIterator:get()
local replicaset = self.next_index
local func_args = {
self.space_name,
self.next_batch,
self.next_batch.tuples,
self.opts,
}

Expand Down
88 changes: 88 additions & 0 deletions crud/common/map_call_cases/batch_upsert_iter.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
local errors = require('errors')

local dev_checks = require('crud.common.dev_checks')
local sharding = require('crud.common.sharding')

local BaseIterator = require('crud.common.map_call_cases.base_iter')

local SplitTuplesError = errors.new_class('SplitTuplesError')

local BatchUpsertIterator = {}
-- inheritance from BaseIterator
setmetatable(BatchUpsertIterator, {__index = BaseIterator})

--- Create new batch upsert iterator for map call
--
-- @function new
--
-- @tparam[opt] table opts
-- Options of BatchUpsertIterator:new
-- @tparam[opt] table opts.tuples
-- Tuples to be upserted
-- @tparam[opt] table opts.space
-- Space to be upserted into
-- @tparam[opt] table opts.operations
-- Operations to be performed on tuples
-- @tparam[opt] table opts.execute_on_storage_opts
-- Additional opts for call on storage
--
-- @return[1] table iterator
-- @treturn[2] nil
-- @treturn[2] table of tables Error description
function BatchUpsertIterator:new(opts)
dev_checks('table', {
tuples = 'table',
space = 'table',
operations = 'table',
execute_on_storage_opts = 'table',
})

local sharding_data, err = sharding.split_tuples_by_replicaset(opts.tuples, opts.space, {
operations = opts.operations,
})
if err ~= nil then
return nil, SplitTuplesError:new("Failed to split tuples by replicaset: %s", err.err)
end

local next_replicaset, next_batch = next(sharding_data.batches)

local execute_on_storage_opts = opts.execute_on_storage_opts
execute_on_storage_opts.sharding_func_hash = sharding_data.sharding_func_hash
execute_on_storage_opts.sharding_key_hash = sharding_data.sharding_key_hash
execute_on_storage_opts.skip_sharding_hash_check = sharding_data.skip_sharding_hash_check

local iter = {
space_name = opts.space.name,
opts = execute_on_storage_opts,
batches_by_replicasets = sharding_data.batches,
next_index = next_replicaset,
next_batch = next_batch,
}

setmetatable(iter, self)
self.__index = self

return iter
end

--- Get function arguments and next replicaset
--
-- @function get
--
-- @return[1] table func_args
-- @return[2] table replicaset
function BatchUpsertIterator:get()
local replicaset = self.next_index
local func_args = {
self.space_name,
self.next_batch.tuples,
self.next_batch.operations,
self.opts,
}

self.next_index, self.next_batch = next(self.batches_by_replicasets, self.next_index)

return func_args, replicaset
end

return BatchUpsertIterator
22 changes: 16 additions & 6 deletions crud/common/sharding/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,12 @@ end
-- @return[1] batches
-- Map where key is a replicaset and value
-- is table of tuples related to this replicaset
function sharding.split_tuples_by_replicaset(tuples, space)
dev_checks('table', 'table')
function sharding.split_tuples_by_replicaset(tuples, space, opts)
dev_checks('table', 'table', {
operations = '?table',
})

opts = opts or {}

local batches = {}

Expand All @@ -219,7 +223,7 @@ function sharding.split_tuples_by_replicaset(tuples, space)
local skip_sharding_hash_check
local sharding_data
local err
for _, tuple in ipairs(tuples) do
for i, tuple in ipairs(tuples) do
sharding_data, err = sharding.tuple_set_and_return_bucket_id(tuple, space)
if err ~= nil then
return nil, BucketIDError:new("Failed to get bucket ID: %s", err)
Expand All @@ -244,9 +248,15 @@ function sharding.split_tuples_by_replicaset(tuples, space)
sharding_data.bucket_id, err.err)
end

local tuples_by_replicaset = batches[replicaset] or {}
table.insert(tuples_by_replicaset, tuple)
batches[replicaset] = tuples_by_replicaset
local record_by_replicaset = batches[replicaset] or {tuples = {}}
table.insert(record_by_replicaset.tuples, tuple)

if opts.operations ~= nil then
record_by_replicaset.operations = record_by_replicaset.operations or {}
table.insert(record_by_replicaset.operations, opts.operations[i])
end

batches[replicaset] = record_by_replicaset
end

return {
Expand Down
Loading

0 comments on commit 063c4ba

Please sign in to comment.