Fixes Flakey Tests (opensearch-project#211)

* Initializes the job index before each test Signed-off-by: Robert Downs <[email protected]> * Fixes test disabling and reenabling ism flakiness Signed-off-by: Robert Downs <[email protected]>
ochprince · Dec 7, 2021 · 4a07429 · 4a07429
1 parent d2eb794
commit 4a07429
Show file tree

Hide file tree

Showing 3 changed files with 22 additions and 10 deletions.
diff --git a/src/test/kotlin/org/opensearch/indexmanagement/IndexManagementRestTestCase.kt b/src/test/kotlin/org/opensearch/indexmanagement/IndexManagementRestTestCase.kt
@@ -14,7 +14,9 @@ import org.opensearch.client.Request
 import org.opensearch.client.RequestOptions
 import org.opensearch.client.Response
 import org.opensearch.client.RestClient
+import org.opensearch.common.Strings
 import org.opensearch.common.settings.Settings
+import org.opensearch.indexmanagement.indexstatemanagement.util.INDEX_HIDDEN
 import org.opensearch.rest.RestStatus
 import java.nio.file.Files
 import java.nio.file.Path
@@ -39,6 +41,20 @@ abstract class IndexManagementRestTestCase : ODFERestTestCase() {
         )
     }
 
+    // Tests on lower resource machines are experiencing flaky failures due to attempting to force a job to
+    // start before the job scheduler has registered the index operations listener. Initializing the index
+    // preemptively seems to give the job scheduler time to listen to operations.
+    @Before
+    fun initializeManagedIndex() {
+        if (!indexExists(IndexManagementPlugin.INDEX_MANAGEMENT_INDEX)) {
+            val request = Request("PUT", "/${IndexManagementPlugin.INDEX_MANAGEMENT_INDEX}")
+            var entity = "{\"settings\": " + Strings.toString(Settings.builder().put(INDEX_HIDDEN, true).build())
+            entity += ",\"mappings\" : ${IndexManagementIndices.indexManagementMappings}}"
+            request.setJsonEntity(entity)
+            client().performRequest(request)
+        }
+    }
+
     protected val isDebuggingTest = DisableOnDebug(null).isDebugging
     protected val isDebuggingRemoteCluster = System.getProperty("cluster.debug", "false")!!.toBoolean()
     protected val isMultiNode = System.getProperty("cluster.number_of_nodes", "1").toInt() > 1

diff --git a/...n/org/opensearch/indexmanagement/indexstatemanagement/IndexStateManagementRestTestCase.kt b/...n/org/opensearch/indexmanagement/indexstatemanagement/IndexStateManagementRestTestCase.kt
@@ -358,7 +358,7 @@ abstract class IndexStateManagementRestTestCase : IndexManagementRestTestCase()
         }
     }
 
-    protected fun updateManagedIndexConfigStartTime(update: ManagedIndexConfig, desiredStartTimeMillis: Long? = null) {
+    protected fun updateManagedIndexConfigStartTime(update: ManagedIndexConfig, desiredStartTimeMillis: Long? = null, retryOnConflict: Int = 0) {
         // Before updating start time of a job always make sure there are no unassigned shards that could cause the config
         // index to move to a new node and negate this forced start
         if (isMultiNode) {
@@ -379,8 +379,9 @@ abstract class IndexStateManagementRestTestCase : IndexManagementRestTestCase()
         val millis = Duration.of(intervalSchedule.interval.toLong(), intervalSchedule.unit).minusSeconds(2).toMillis()
         val startTimeMillis = desiredStartTimeMillis ?: Instant.now().toEpochMilli() - millis
         val waitForActiveShards = if (isMultiNode) "all" else "1"
+        val endpoint = "$INDEX_MANAGEMENT_INDEX/_update/${update.id}?wait_for_active_shards=$waitForActiveShards;retry_on_conflict=$retryOnConflict"
         val response = client().makeRequest(
-            "POST", "$INDEX_MANAGEMENT_INDEX/_update/${update.id}?wait_for_active_shards=$waitForActiveShards",
+            "POST", endpoint,
             StringEntity(
                 "{\"doc\":{\"managed_index\":{\"schedule\":{\"interval\":{\"start_time\":" +
                     "\"$startTimeMillis\"}}}}}",

diff --git a/.../opensearch/indexmanagement/indexstatemanagement/coordinator/ManagedIndexCoordinatorIT.kt b/.../opensearch/indexmanagement/indexstatemanagement/coordinator/ManagedIndexCoordinatorIT.kt
@@ -202,7 +202,6 @@ class ManagedIndexCoordinatorIT : IndexStateManagementRestTestCase() {
         // Confirm job was disabled
         val disabledManagedIndexConfig: ManagedIndexConfig = waitFor {
             val config = getManagedIndexConfigByDocId(managedIndexConfig.id)
-            assertNotNull("Could not find ManagedIndexConfig", config)
             assertEquals("ManagedIndexConfig was not disabled", false, config!!.enabled)
             config
         }
@@ -230,16 +229,12 @@ class ManagedIndexCoordinatorIT : IndexStateManagementRestTestCase() {
 
         // Confirm job was re-enabled
         val enabledManagedIndexConfig: ManagedIndexConfig = waitFor {
-            val config = getExistingManagedIndexConfig(indexName)
-            assertEquals("ManagedIndexConfig was not re-enabled", true, config.enabled)
+            val config = getManagedIndexConfigByDocId(disabledManagedIndexConfig.id)
+            assertEquals("ManagedIndexConfig was not re-enabled", true, config!!.enabled)
             config
         }
 
-        // TODO seen version conflict flaky failure here
-        logger.info("Config we use on update: $enabledManagedIndexConfig")
-        logger.info("Latest config: ${getExistingManagedIndexConfig(indexName)}")
-        // seems the config from above waitFor, after that, config got updated again?
-        updateManagedIndexConfigStartTime(enabledManagedIndexConfig)
+        updateManagedIndexConfigStartTime(enabledManagedIndexConfig, retryOnConflict = 4)
 
         waitFor {
             assertEquals(