Skip to content

Commit

Permalink
Fix testSearchAndRelocateConcurrently (elastic#116806)
Browse files Browse the repository at this point in the history
This aims to test we can search through replica shard relocations.
However, the way the test was written it was sometimes also starting
another data node. The concurrent search requests would sometimes
hit this new node, before its cluster state was RECOVERED.

The search action throws exception when the cluster state is not
recovered as it needs to be able to read the cluster state.

This fixes the test to grab a coy of the bootstrapped nodes and use them when calling the _search API
before the cluster (potentially) resizes.
  • Loading branch information
andreidan authored and alexey-ivanov-es committed Nov 28, 2024
1 parent fbf75d3 commit a0fb86b
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 29 deletions.
3 changes: 0 additions & 3 deletions muted-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,6 @@ tests:
- class: org.elasticsearch.search.basic.SearchWithRandomDisconnectsIT
method: testSearchWithRandomDisconnects
issue: https://github.com/elastic/elasticsearch/issues/116175
- class: org.elasticsearch.search.basic.SearchWhileRelocatingIT
method: testSearchAndRelocateConcurrentlyRandomReplicas
issue: https://github.com/elastic/elasticsearch/issues/116145
- class: org.elasticsearch.xpack.deprecation.DeprecationHttpIT
method: testDeprecatedSettingsReturnWarnings
issue: https://github.com/elastic/elasticsearch/issues/108628
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ private void testSearchAndRelocateConcurrently(final int numberOfReplicas) throw
}
indexRandom(true, indexBuilders.toArray(new IndexRequestBuilder[indexBuilders.size()]));
assertHitCount(prepareSearch(), (numDocs));
// hold a copy of the node names before a new node is potentially added later
String[] nodeNamesBeforeClusterResize = internalCluster().getNodeNames();
final int numIters = scaledRandomIntBetween(5, 20);
for (int i = 0; i < numIters; i++) {
final AtomicBoolean stop = new AtomicBoolean(false);
Expand All @@ -76,34 +78,37 @@ private void testSearchAndRelocateConcurrently(final int numberOfReplicas) throw
public void run() {
try {
while (stop.get() == false) {
assertResponse(prepareSearch().setSize(numDocs), response -> {
if (response.getHits().getTotalHits().value() != numDocs) {
// if we did not search all shards but had no serious failures that is potentially fine
// if only the hit-count is wrong. this can happen if the cluster-state is behind when the
// request comes in. It's a small window but a known limitation.
if (response.getTotalShards() != response.getSuccessfulShards()
&& Stream.of(response.getShardFailures())
.allMatch(ssf -> ssf.getCause() instanceof NoShardAvailableActionException)) {
nonCriticalExceptions.add(
"Count is "
+ response.getHits().getTotalHits().value()
+ " but "
+ numDocs
+ " was expected. "
+ formatShardStatus(response)
);
} else {
assertHitCount(response, numDocs);
assertResponse(
client(randomFrom(nodeNamesBeforeClusterResize)).prepareSearch().setSize(numDocs),
response -> {
if (response.getHits().getTotalHits().value() != numDocs) {
// if we did not search all shards but had no serious failures that is potentially fine
// if only the hit-count is wrong. this can happen if the cluster-state is behind when the
// request comes in. It's a small window but a known limitation.
if (response.getTotalShards() != response.getSuccessfulShards()
&& Stream.of(response.getShardFailures())
.allMatch(ssf -> ssf.getCause() instanceof NoShardAvailableActionException)) {
nonCriticalExceptions.add(
"Count is "
+ response.getHits().getTotalHits().value()
+ " but "
+ numDocs
+ " was expected. "
+ formatShardStatus(response)
);
} else {
assertHitCount(response, numDocs);
}
}
}

final SearchHits sh = response.getHits();
assertThat(
"Expected hits to be the same size the actual hits array",
sh.getTotalHits().value(),
equalTo((long) (sh.getHits().length))
);
});
final SearchHits sh = response.getHits();
assertThat(
"Expected hits to be the same size the actual hits array",
sh.getTotalHits().value(),
equalTo((long) (sh.getHits().length))
);
}
);
// this is the more critical but that we hit the actual hit array has a different size than the
// actual number of hits.
}
Expand Down

0 comments on commit a0fb86b

Please sign in to comment.