From 1c122520693293c67ce0a31ac98ff596298edb74 Mon Sep 17 00:00:00 2001 From: Raghuvansh Raj Date: Fri, 10 Nov 2023 07:00:31 +0530 Subject: [PATCH] Bugfix for update staying stuck when sent as part of bulk with retry_on_conflict specified Signed-off-by: Raghuvansh Raj --- CHANGELOG.md | 3 +- .../bulk/BulkPrimaryExecutionContext.java | 1 + .../bulk/TransportShardBulkActionTests.java | 63 +++++++++++++++++++ 3 files changed, 66 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ff0d4e3fd285..0f46727ff8c4c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [AdmissionControl] Added changes for AdmissionControl Interceptor and AdmissionControlService for RateLimiting ([#9286](https://github.com/opensearch-project/OpenSearch/pull/9286)) - GHA to verify checklist items completion in PR descriptions ([#10800](https://github.com/opensearch-project/OpenSearch/pull/10800)) - [Remote cluster state] Restore cluster state version during remote state auto restore ([#10853](https://github.com/opensearch-project/OpenSearch/pull/10853)) +- Bugfix for update staying stuck when sent as part of bulk with `retry_on_conflict` specified ([#11152](https://github.com/opensearch-project/OpenSearch/issues/11152)) ### Dependencies - Bump `log4j-core` from 2.18.0 to 2.19.0 @@ -147,4 +148,4 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Security [Unreleased 3.0]: https://github.com/opensearch-project/OpenSearch/compare/2.x...HEAD -[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.12...2.x \ No newline at end of file +[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.12...2.x diff --git a/server/src/main/java/org/opensearch/action/bulk/BulkPrimaryExecutionContext.java b/server/src/main/java/org/opensearch/action/bulk/BulkPrimaryExecutionContext.java index 896456089ee3e..4e770f5851bc6 100644 --- a/server/src/main/java/org/opensearch/action/bulk/BulkPrimaryExecutionContext.java +++ b/server/src/main/java/org/opensearch/action/bulk/BulkPrimaryExecutionContext.java @@ -232,6 +232,7 @@ public void resetForExecutionForRetry() { currentItemState = ItemProcessingState.INITIAL; requestToExecute = null; executionResult = null; + retryCounter++; assertInvariants(ItemProcessingState.INITIAL); } diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java index b325cfa197933..99c311c11e33f 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java @@ -101,6 +101,7 @@ import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.CountDownLatch; import java.util.concurrent.CyclicBarrier; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import static org.hamcrest.CoreMatchers.equalTo; @@ -947,6 +948,68 @@ public void testRetries() throws Exception { latch.await(); } + public void testUpdateWithRetryOnConflict() throws IOException, InterruptedException { + IndexSettings indexSettings = new IndexSettings(indexMetadata(), Settings.EMPTY); + UpdateRequest writeRequest = new UpdateRequest("index", "id").doc(Requests.INDEX_CONTENT_TYPE, "field", "value"); + writeRequest.retryOnConflict(3); + BulkItemRequest primaryRequest = new BulkItemRequest(0, writeRequest); + + IndexRequest updateResponse = new IndexRequest("index").id("id").source(Requests.INDEX_CONTENT_TYPE, "field", "value"); + + Exception err = new VersionConflictEngineException(shardId, "id", "I'm conflicted <(;_;)>"); + Engine.IndexResult conflictedResult = new Engine.IndexResult(err, 0); + + IndexShard shard = mock(IndexShard.class); + when(shard.applyIndexOperationOnPrimary(anyLong(), any(), any(), anyLong(), anyLong(), anyLong(), anyBoolean())).thenAnswer( + ir -> conflictedResult + ); + when(shard.indexSettings()).thenReturn(indexSettings); + when(shard.shardId()).thenReturn(shardId); + when(shard.mapperService()).thenReturn(mock(MapperService.class)); + + UpdateHelper updateHelper = mock(UpdateHelper.class); + when(updateHelper.prepare(any(), eq(shard), any())).thenReturn( + new UpdateHelper.Result( + updateResponse, + randomBoolean() ? DocWriteResponse.Result.CREATED : DocWriteResponse.Result.UPDATED, + Collections.singletonMap("field", "value"), + Requests.INDEX_CONTENT_TYPE + ) + ); + + BulkItemRequest[] items = new BulkItemRequest[] { primaryRequest }; + BulkShardRequest bulkShardRequest = new BulkShardRequest(shardId, RefreshPolicy.NONE, items); + + final CountDownLatch latch = new CountDownLatch(1); + Runnable runnable = () -> TransportShardBulkAction.performOnPrimary( + bulkShardRequest, + shard, + updateHelper, + threadPool::absoluteTimeInMillis, + new NoopMappingUpdatePerformer(), + listener -> listener.onResponse(null), + new LatchedActionListener<>( + ActionTestUtils.assertNoFailureListener( + result -> assertEquals( + VersionConflictEngineException.class, + result.replicaRequest().items()[0].getPrimaryResponse().getFailure().getCause().getClass() + ) + ), + latch + ), + threadPool, + Names.WRITE + ); + + // execute the runnable on a separate thread so that the infinite loop can be detected + Thread thread = new Thread(runnable); + thread.start(); + + // timeout the request in 10 seconds if there is an infinite loop + assertTrue(latch.await(10, TimeUnit.SECONDS)); + assertEquals(items[0].getPrimaryResponse().getFailure().getCause().getClass(), VersionConflictEngineException.class); + } + public void testForceExecutionOnRejectionAfterMappingUpdate() throws Exception { TestThreadPool rejectingThreadPool = new TestThreadPool( "TransportShardBulkActionTests#testForceExecutionOnRejectionAfterMappingUpdate",