Skip to content

Commit

Permalink
Refactor cluster state listener transport calls
Browse files Browse the repository at this point in the history
Signed-off-by: Ryan Bogan <[email protected]>
  • Loading branch information
ryanbogan committed Apr 18, 2024
1 parent 3508c79 commit 231452e
Showing 1 changed file with 15 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,7 @@ protected void updateModelsNewCluster() throws IOException, InterruptedException
if (modelDao.isCreated()) {
List<String> modelIds = searchModelIds();
for (String modelId : modelIds) {
Model model = modelDao.get(modelId);
ModelMetadata modelMetadata = model.getModelMetadata();
ModelMetadata modelMetadata = getModelMetadata(modelId);
if (modelMetadata.getState().equals(ModelState.TRAINING)) {
updateModelStateAsFailed(modelId, modelMetadata, "Training failed to complete as cluster crashed");
}
Expand All @@ -123,7 +122,7 @@ protected void updateModelsNodesRemoved(List<DiscoveryNode> removedNodes) throws
List<String> modelIds = searchModelIds();
for (DiscoveryNode removedNode : removedNodes) {
for (String modelId : modelIds) {
ModelMetadata modelMetadata = modelDao.getMetadata(modelId);
ModelMetadata modelMetadata = getModelMetadata(modelId);
if (modelMetadata.getNodeAssignment().equals(removedNode.getEphemeralId())
&& modelMetadata.getState().equals(ModelState.TRAINING)) {
updateModelStateAsFailed(modelId, modelMetadata, "Training failed to complete as node dropped");
Expand Down Expand Up @@ -174,4 +173,17 @@ public void onFailure(Exception e) {
}
});
}

private ModelMetadata getModelMetadata(String modelId) throws ExecutionException, InterruptedException {
ModelMetadata modelMetadata = modelDao.getMetadata(modelId);
// On versions prior to 2.14, only models in created state are present in model metadata.
if (modelMetadata == null) {
log.info(
"Model metadata is null in cluster metadata. This can happen for models training on nodes prior to OpenSearch version 2.14.0. Fetching model information from system index."
);
Model model = modelDao.get(modelId);
return model.getModelMetadata();
}
return modelMetadata;
}
}

0 comments on commit 231452e

Please sign in to comment.