From 943eaaf7d0f591cec3378eb40507bed0bca103a8 Mon Sep 17 00:00:00 2001 From: Pavel Karpy Date: Sat, 23 Nov 2024 15:57:20 +0300 Subject: [PATCH] object/put: fix concurrent PUT data corruption If ants pool is busy and cannot take task, early `return` without `wg.Wait()` leads to `iterateNodesForObject`'s `return` and all the buffers for binary replication from now may be reused while are still in use by the other routines inside the pool. Wait for WG and try other nodes more instead, it also can increase the rate of successful PUTs at high loads. Closes #2978. Signed-off-by: Pavel Karpy --- CHANGELOG.md | 1 + pkg/services/object/put/distributed.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3062f280b3..ba41d937e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ attribute, which is used for container domain name in NNS contracts (#2954) - `meta.DB.Open(readOnly)` moves metabase in RO mode (#3000) - Panic in event listener related to inability to switch RPC node (#2970) - Non-container nodes never check placement policy on PUT, SEARCH requests (#3014) +- Data corruption if PUT is done too concurrently (#2978) ### Changed - `ObjectService`'s `Put` RPC handler caches up to 10K lists of per-object sorted container nodes (#2901) diff --git a/pkg/services/object/put/distributed.go b/pkg/services/object/put/distributed.go index 476bad12e7..2b583d1efb 100644 --- a/pkg/services/object/put/distributed.go +++ b/pkg/services/object/put/distributed.go @@ -331,7 +331,7 @@ func (x placementIterator) iterateNodesForObject(obj oid.ID, f func(nodeDesc) er if e, _ := lastRespErr.Load().(error); e != nil { err = fmt.Errorf("%w (last node error: %w)", err, e) } - return errIncompletePut{singleErr: err} + wg.Wait() } } wg.Wait()