Skip to content

Commit

Permalink
fix: update health check to support modelmesh (#20142) (#20218)
Browse files Browse the repository at this point in the history
Signed-off-by: Trevor Royer <[email protected]>
Co-authored-by: Trevor Royer <[email protected]>
Co-authored-by: Dan Garfield <[email protected]>
  • Loading branch information
3 people authored Oct 3, 2024
1 parent 4149f48 commit 5eb1f9b
Show file tree
Hide file tree
Showing 9 changed files with 244 additions and 44 deletions.
Original file line number Diff line number Diff line change
@@ -1,50 +1,59 @@
-- isInferenceServiceInRawDeploymentMode determines if the inference service deployed in RawDeployment mode
-- KServe v12 and above supports Rawdeployment for Inference graphs. For Inference services, KServe has supported RawDeployment model since [v0.7.0](https://github.com/kserve/kserve/releases/tag/v0.7.0).
function isInferenceServiceInRawDeploymentMode(obj)
if obj.metadata.annotations == nil then
return false
end
local deploymentMode = obj.metadata.annotations["serving.kserve.io/deploymentMode"]
return deploymentMode ~= nil and deploymentMode == "RawDeployment"
end

local health_status = {}

health_status.status = "Progressing"
health_status.message = "Waiting for status update."
if obj.status ~= nil and obj.status.conditions ~= nil then
local status_true = 0
health_status.message = "Waiting for InferenceService to report status..."

if obj.status ~= nil then

local progressing = false
local degraded = false
local status_false = 0
local status_unknown = 0
health_status.message = ""
for i, condition in pairs(obj.status.conditions) do
if condition.status == "True" and (condition.type == "IngressReady" or condition.type == "PredictorConfigurationReady" or condition.type == "PredictorReady" or condition.type == "PredictorRouteReady" or condition.type == "Ready") then
status_true = status_true + 1
elseif condition.status == "False" or condition.status == "Unknown" then
msg = condition.type .. " is " .. condition.status
if condition.reason ~= nil and condition.reason ~= "" then
msg = msg .. ", since " .. condition.reason .. "."
end
if condition.message ~= nil and condition.message ~= "" then
msg = msg .. " " .. condition.message
end
health_status.message = health_status.message .. msg .. "\n"
if condition.status == "False" then
status_false = status_false + 1
local msg = ""

if obj.status.modelStatus ~= nil then
if obj.status.modelStatus.transitionStatus ~= "UpToDate" then
if obj.status.modelStatus.transitionStatus == "InProgress" then
progressing = true
else
status_unknown = status_unknown + 1
degraded = true
end
msg = msg .. "0: transitionStatus | " .. obj.status.modelStatus.transitionStatus
end
end
if ((isInferenceServiceInRawDeploymentMode(obj) and status_true == 3) or status_true == 5) and status_false == 0 and status_unknown == 0 then
health_status.message = "Inference Service is healthy."
health_status.status = "Healthy"
return health_status
elseif status_false > 0 then
health_status.status = "Degraded"
return health_status
else
health_status.status = "Progressing"
return health_status

if obj.status.conditions ~= nil then
for i, condition in pairs(obj.status.conditions) do

if condition.status == "Unknown" then
status_unknown = status_unknown + 1
elseif condition.status == "False" then
status_false = status_false + 1
end

if condition.status ~= "True" then
msg = msg .. " | " .. i .. ": " .. condition.type .. " | " .. condition.status
if condition.reason ~= nil and condition.reason ~= "" then
msg = msg .. " | " .. condition.reason
end
if condition.message ~= nil and condition.message ~= "" then
msg = msg .. " | " .. condition.message
end
end

end

if progressing == false and degraded == false and status_unknown == 0 and status_false == 0 then
health_status.status = "Healthy"
msg = "InferenceService is healthy."
elseif degraded == false and status_unknown >= 0 then
health_status.status = "Progressing"
else
health_status.status = "Degraded"
end

health_status.message = msg
end
end
return health_status

return health_status
Original file line number Diff line number Diff line change
@@ -1,17 +1,41 @@
tests:
- healthStatus:
status: Progressing
message: "PredictorConfigurationReady is Unknown\nPredictorReady is Unknown, since RevisionMissing. Configuration \"hello-world-predictor-default\" is waiting for a Revision to become ready.\nPredictorRouteReady is Unknown, since RevisionMissing. Configuration \"hello-world-predictor-default\" is waiting for a Revision to become ready.\nReady is Unknown, since RevisionMissing. Configuration \"hello-world-predictor-default\" is waiting for a Revision to become ready.\n"
message: ' | 1: PredictorConfigurationReady | Unknown | 2: PredictorReady | Unknown | RevisionMissing | Configuration "hello-world-predictor-default" is waiting for a Revision to become ready. | 3: PredictorRouteReady | Unknown | RevisionMissing | Configuration "hello-world-predictor-default" is waiting for a Revision to become ready. | 4: Ready | Unknown | RevisionMissing | Configuration "hello-world-predictor-default" is waiting for a Revision to become ready.'
inputPath: testdata/progressing.yaml
- healthStatus:
status: Progressing
message: '0: transitionStatus | InProgress | 1: LatestDeploymentReady | Unknown | PredictorConfigurationReady not ready | 2: PredictorConfigurationReady | Unknown | 3: PredictorReady | Unknown | RevisionMissing | Configuration "helloworld-predictor" is waiting for a Revision to become ready. | 4: PredictorRouteReady | Unknown | RevisionMissing | Configuration "helloworld-predictor" is waiting for a Revision to become ready. | 5: Ready | Unknown | RevisionMissing | Configuration "helloworld-predictor" is waiting for a Revision to become ready. | 6: RoutesReady | Unknown | PredictorRouteReady not ready'
inputPath: testdata/progressing_ocp.yaml
- healthStatus:
status: Progressing
message: "0: transitionStatus | InProgress | 1: PredictorReady | False | 2: Ready | False"
inputPath: testdata/progressing_modelmesh.yaml
- healthStatus:
status: Degraded
message: "IngressReady is False, since Predictor ingress not created.\nPredictorConfigurationReady is False, since RevisionFailed. Revision \"helloworld-00002\" failed with message: Container failed with: container exited with no error.\nPredictorReady is False, since RevisionFailed. Revision \"helloworld-00002\" failed with message: Container failed with: container exited with no error.\nReady is False, since Predictor ingress not created.\n"
message: '0: transitionStatus | BlockedByFailedLoad | 1: IngressReady | False | Predictor ingress not created | 2: PredictorConfigurationReady | False | RevisionFailed | Revision "helloworld-00002" failed with message: Container failed with: container exited with no error. | 3: PredictorReady | False | RevisionFailed | Revision "helloworld-00002" failed with message: Container failed with: container exited with no error. | 5: Ready | False | Predictor ingress not created'
inputPath: testdata/degraded.yaml
- healthStatus:
status: Degraded
message: '0: transitionStatus | BlockedByFailedLoad | 1: LatestDeploymentReady | False | PredictorConfigurationReady not ready | 2: PredictorConfigurationReady | False | RevisionFailed | Revision "helloworld-predictor-00002" failed with message: . | 3: PredictorReady | False | RevisionMissing | Configuration "helloworld-predictor" does not have any ready Revision. | 4: PredictorRouteReady | False | RevisionMissing | Configuration "helloworld-predictor" does not have any ready Revision. | 5: Ready | False | RevisionMissing | Configuration "helloworld-predictor" does not have any ready Revision. | 6: RoutesReady | False | PredictorRouteReady not ready'
inputPath: testdata/degraded_ocp.yaml
- healthStatus:
status: Degraded
message: "0: transitionStatus | BlockedByFailedLoad"
inputPath: testdata/degraded_modelmesh.yaml
- healthStatus:
status: Healthy
message: Inference Service is healthy.
message: InferenceService is healthy.
inputPath: testdata/healthy.yaml
- healthStatus:
status: Healthy
message: Inference Service is healthy.
message: InferenceService is healthy.
inputPath: testdata/healthy_ocp.yaml
- healthStatus:
status: Healthy
message: InferenceService is healthy.
inputPath: testdata/healthy_modelmesh.yaml
- healthStatus:
status: Healthy
message: InferenceService is healthy.
inputPath: testdata/healthy_raw.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,5 @@ status:
reason: Predictor ingress not created
status: "False"
type: Ready
modelStatus:
transitionStatus: BlockedByFailedLoad
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: helloworld
namespace: default
spec: {}
status:
conditions:
- lastTransitionTime: '2024-05-30T22:43:16Z'
status: 'True'
type: PredictorReady
- lastTransitionTime: '2024-05-30T22:43:16Z'
status: 'True'
type: Ready
modelStatus:
transitionStatus: BlockedByFailedLoad
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: helloworld
namespace: default
spec: {}
status:
conditions:
- lastTransitionTime: '2024-05-30T23:03:45Z'
reason: PredictorConfigurationReady not ready
severity: Info
status: 'False'
type: LatestDeploymentReady
- lastTransitionTime: '2024-05-30T23:03:45Z'
message: 'Revision "helloworld-predictor-00002" failed with message: .'
reason: RevisionFailed
severity: Info
status: 'False'
type: PredictorConfigurationReady
- lastTransitionTime: '2024-05-30T23:03:45Z'
message: Configuration "helloworld-predictor" does not have any ready Revision.
reason: RevisionMissing
status: 'False'
type: PredictorReady
- lastTransitionTime: '2024-05-30T23:03:45Z'
message: Configuration "helloworld-predictor" does not have any ready Revision.
reason: RevisionMissing
severity: Info
status: 'False'
type: PredictorRouteReady
- lastTransitionTime: '2024-05-30T23:03:45Z'
message: Configuration "helloworld-predictor" does not have any ready Revision.
reason: RevisionMissing
status: 'False'
type: Ready
- lastTransitionTime: '2024-05-30T23:03:45Z'
reason: PredictorRouteReady not ready
severity: Info
status: 'False'
type: RoutesReady
modelStatus:
transitionStatus: BlockedByFailedLoad
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: helloworld
namespace: default
spec: {}
status:
conditions:
- lastTransitionTime: '2024-05-30T22:43:16Z'
status: 'True'
type: PredictorReady
- lastTransitionTime: '2024-05-30T22:43:16Z'
status: 'True'
type: Ready
modelStatus:
transitionStatus: UpToDate
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: helloworld
namespace: default
spec: {}
status:
conditions:
- lastTransitionTime: '2024-05-30T22:14:31Z'
status: 'True'
type: IngressReady
- lastTransitionTime: '2024-05-30T22:14:30Z'
severity: Info
status: 'True'
type: LatestDeploymentReady
- lastTransitionTime: '2024-05-30T22:14:30Z'
severity: Info
status: 'True'
type: PredictorConfigurationReady
- lastTransitionTime: '2024-05-30T22:14:31Z'
status: 'True'
type: PredictorReady
- lastTransitionTime: '2024-05-30T22:14:31Z'
severity: Info
status: 'True'
type: PredictorRouteReady
- lastTransitionTime: '2024-05-30T22:14:31Z'
status: 'True'
type: Ready
- lastTransitionTime: '2024-05-30T22:14:31Z'
severity: Info
status: 'True'
type: RoutesReady
modelStatus:
transitionStatus: UpToDate
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: helloworld
namespace: default
spec: {}
status:
conditions:
- lastTransitionTime: '2024-05-30T22:43:16Z'
status: 'False'
type: PredictorReady
- lastTransitionTime: '2024-05-30T22:43:16Z'
status: 'False'
type: Ready
modelStatus:
transitionStatus: InProgress
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: helloworld
namespace: default
spec: {}
status:
conditions:
- lastTransitionTime: '2024-05-30T22:29:46Z'
reason: PredictorConfigurationReady not ready
severity: Info
status: Unknown
type: LatestDeploymentReady
- lastTransitionTime: '2024-05-30T22:29:46Z'
severity: Info
status: Unknown
type: PredictorConfigurationReady
- lastTransitionTime: '2024-05-30T22:29:46Z'
message: Configuration "helloworld-predictor" is waiting for a Revision to become ready.
reason: RevisionMissing
status: Unknown
type: PredictorReady
- lastTransitionTime: '2024-05-30T22:29:46Z'
message: Configuration "helloworld-predictor" is waiting for a Revision to become ready.
reason: RevisionMissing
severity: Info
status: Unknown
type: PredictorRouteReady
- lastTransitionTime: '2024-05-30T22:29:46Z'
message: Configuration "helloworld-predictor" is waiting for a Revision to become ready.
reason: RevisionMissing
status: Unknown
type: Ready
- lastTransitionTime: '2024-05-30T22:29:46Z'
reason: PredictorRouteReady not ready
severity: Info
status: Unknown
type: RoutesReady
modelStatus:
transitionStatus: InProgress

0 comments on commit 5eb1f9b

Please sign in to comment.