Skip to content

Commit

Permalink
fix: Handle unload too quick after load (#5504)
Browse files Browse the repository at this point in the history
* fix note

* add ability to check loading state

* change func name to be more reflective
  • Loading branch information
sakoush authored Apr 8, 2024
1 parent 3ce7029 commit 0b9ee7d
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 3 deletions.
10 changes: 7 additions & 3 deletions scheduler/pkg/scheduler/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ func (s *SimpleScheduler) scheduleToServer(modelName string) error {
}

if model.Deleted {
// we need to LoadedModels anyway:
// we need to call UpdateLoadedModels anyway:
// - in case where we are deleting a model that doesnt have a server (FailedSchedule), server is ""
// - otherwise proceed a normal
server := ""
Expand Down Expand Up @@ -202,8 +202,12 @@ func (s *SimpleScheduler) scheduleToServer(modelName string) error {
if !ok {
msg := "Failed to schedule model as no matching server had enough suitable replicas"
logger.Debug(msg)
// we do not want to reset the server if it has live replicas
s.store.FailedScheduling(latestModel, msg, !latestModel.HasLiveReplicas())
// we do not want to reset the server if it has live replicas or loading replicas
// in the case of loading replicas, we need to make sure that we can unload them later.
// for example in the case that a model is just marked as loading on a particular server replica
// then it gets a delete request (before it is marked as loaded or available) we need to make sure
// that we can unload it from the server
s.store.FailedScheduling(latestModel, msg, !latestModel.HasLiveReplicas() && !latestModel.IsLoadingOrLoadedOnServer())
return errors.New(msg)
}

Expand Down
11 changes: 11 additions & 0 deletions scheduler/pkg/store/mesh.go
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,17 @@ func (m *ModelVersion) IsLoadingOrLoaded(server string, replicaIdx int) bool {
return false
}

func (m *ModelVersion) IsLoadingOrLoadedOnServer() bool {
m.mu.RLock()
defer m.mu.RUnlock()
for _, v := range m.replicas {
if v.State.AlreadyLoadingOrLoaded() {
return true
}
}
return false
}

func (m *ModelVersion) HasLiveReplicas() bool {
m.mu.RLock()
defer m.mu.RUnlock()
Expand Down

0 comments on commit 0b9ee7d

Please sign in to comment.