diff --git a/pkg/controller.v1/tensorflow/controller.go b/pkg/controller.v1/tensorflow/controller.go index 2bc1ecdf05..4cd9b6e6a7 100644 --- a/pkg/controller.v1/tensorflow/controller.go +++ b/pkg/controller.v1/tensorflow/controller.go @@ -17,7 +17,6 @@ package tensorflow import ( "fmt" - "reflect" "strings" "time" @@ -45,6 +44,7 @@ import ( "github.com/kubeflow/tf-operator/pkg/util/k8sutil" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + apiequality "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/runtime/schema" ) @@ -432,7 +432,12 @@ func (tc *TFController) reconcileTFJobs(tfjob *tfv1.TFJob) error { tfjob.Status.ReplicaStatuses[rtype].Active = 0 } } - return tc.updateStatusHandler(tfjob) + // no need to update the tfjob if the status hasn't changed since last time even the tfjob is not running. + + if !apiequality.Semantic.DeepEqual(*oldStatus, tfjob.Status) { + return tc.updateStatusHandler(tfjob) + } + return nil } if tc.Config.EnableGangScheduling { @@ -463,7 +468,7 @@ func (tc *TFController) reconcileTFJobs(tfjob *tfv1.TFJob) error { } // no need to update the tfjob if the status hasn't changed since last time. - if !reflect.DeepEqual(*oldStatus, tfjob.Status) { + if !apiequality.Semantic.DeepEqual(*oldStatus, tfjob.Status) { return tc.updateStatusHandler(tfjob) } return nil diff --git a/pkg/controller.v1/tensorflow/status.go b/pkg/controller.v1/tensorflow/status.go index 88620e501e..a9df47786b 100644 --- a/pkg/controller.v1/tensorflow/status.go +++ b/pkg/controller.v1/tensorflow/status.go @@ -172,6 +172,11 @@ func (tc *TFController) updateStatusSingle(tfjob *tfv1.TFJob, rtype tfv1.TFRepli // updateTFJobStatus updates the status of the given TFJob. func (tc *TFController) updateTFJobStatus(tfjob *tfv1.TFJob) error { + startTime := time.Now() + defer func() { + tflogger.LoggerForJob(tfjob).Infof("Finished updating TFJobs Status %q (%v)", + tfjob.Name, time.Since(startTime)) + }() _, err := tc.tfJobClientSet.KubeflowV1().TFJobs(tfjob.Namespace).UpdateStatus(tfjob) return err } diff --git a/pkg/controller.v1beta2/tensorflow/controller.go b/pkg/controller.v1beta2/tensorflow/controller.go index 43d1d7e134..ed53d2a266 100644 --- a/pkg/controller.v1beta2/tensorflow/controller.go +++ b/pkg/controller.v1beta2/tensorflow/controller.go @@ -17,7 +17,6 @@ package tensorflow import ( "fmt" - "reflect" "strings" "time" @@ -43,6 +42,7 @@ import ( "github.com/kubeflow/tf-operator/pkg/common/jobcontroller" tflogger "github.com/kubeflow/tf-operator/pkg/logger" "github.com/kubeflow/tf-operator/pkg/util/k8sutil" + apiequality "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/runtime/schema" ) @@ -423,7 +423,10 @@ func (tc *TFController) reconcileTFJobs(tfjob *tfv1beta2.TFJob) error { tfjob.Status.ReplicaStatuses[rtype].Active = 0 } } - return tc.updateStatusHandler(tfjob) + // no need to update the tfjob if the status hasn't changed since last time even the tfjob is not running. + if !apiequality.Semantic.DeepEqual(*oldStatus, tfjob.Status) { + return tc.updateStatusHandler(tfjob) + } } if tc.Config.EnableGangScheduling { @@ -454,7 +457,7 @@ func (tc *TFController) reconcileTFJobs(tfjob *tfv1beta2.TFJob) error { } // no need to update the tfjob if the status hasn't changed since last time. - if !reflect.DeepEqual(*oldStatus, tfjob.Status) { + if !apiequality.Semantic.DeepEqual(*oldStatus, tfjob.Status) { return tc.updateStatusHandler(tfjob) } return nil diff --git a/pkg/controller.v1beta2/tensorflow/status.go b/pkg/controller.v1beta2/tensorflow/status.go index dd7d1ace43..dbedac28d5 100644 --- a/pkg/controller.v1beta2/tensorflow/status.go +++ b/pkg/controller.v1beta2/tensorflow/status.go @@ -150,6 +150,11 @@ func (tc *TFController) updateStatusSingle(tfjob *tfv1beta2.TFJob, rtype tfv1bet // updateTFJobStatus updates the status of the given TFJob. func (tc *TFController) updateTFJobStatus(tfjob *tfv1beta2.TFJob) error { + startTime := time.Now() + defer func() { + tflogger.LoggerForJob(tfjob).Infof("Finished updating TFJobs Status %q (%v)", + tfjob.Name, time.Since(startTime)) + }() _, err := tc.tfJobClientSet.KubeflowV1beta2().TFJobs(tfjob.Namespace).UpdateStatus(tfjob) return err }