-
Notifications
You must be signed in to change notification settings - Fork 710
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix a bunch of problems in TfJob CRD that crept in while tests were b…
…roken (#308) * In syncTfJob when checking whether a work queue item corresponds to a TrainingJob already in the map we need to check the UID. Otherwise we will not properly handle the case where a training job is deleted and then a new job is recreated with the same name. * We need to make sure that the Replicas field in TrainingJob is always properly set; * We were only initializing replicas in setup which was problematic in the case where the TfJob controller gets restarted because on restarted setup won't be invoked because the job is past that phase and as a result the replicas won't be reinitialized. * test_runner needs to ignore case when checking whether the job succeeded otherwise we conclude that successful jobs failed * The controller should only forget about job after the job has been cleaned up; not when it is marked as succeeded or failed. * Add back code to support termination policies use the worker and not the master as the chief *This was added in #221 and accidentally removed in the refactor in #234.
- Loading branch information
Showing
8 changed files
with
266 additions
and
102 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
package validation | ||
|
||
import ( | ||
"testing" | ||
|
||
tfv1 "github.com/tensorflow/k8s/pkg/apis/tensorflow/v1alpha1" | ||
|
||
"github.com/gogo/protobuf/proto" | ||
"k8s.io/api/core/v1" | ||
) | ||
|
||
func TestValidate(t *testing.T) { | ||
type testCase struct { | ||
in *tfv1.TfJobSpec | ||
expectingError bool | ||
} | ||
|
||
testCases := []testCase{ | ||
{ | ||
in: &tfv1.TfJobSpec{ | ||
ReplicaSpecs: []*tfv1.TfReplicaSpec{ | ||
{ | ||
Template: &v1.PodTemplateSpec{ | ||
Spec: v1.PodSpec{ | ||
Containers: []v1.Container{ | ||
{ | ||
Name: "tensorflow", | ||
}, | ||
}, | ||
}, | ||
}, | ||
TfReplicaType: tfv1.MASTER, | ||
Replicas: proto.Int32(1), | ||
}, | ||
}, | ||
TfImage: "tensorflow/tensorflow:1.3.0", | ||
}, | ||
expectingError: false, | ||
}, | ||
{ | ||
in: &tfv1.TfJobSpec{ | ||
ReplicaSpecs: []*tfv1.TfReplicaSpec{ | ||
{ | ||
Template: &v1.PodTemplateSpec{ | ||
Spec: v1.PodSpec{ | ||
Containers: []v1.Container{ | ||
{ | ||
Name: "tensorflow", | ||
}, | ||
}, | ||
}, | ||
}, | ||
TfReplicaType: tfv1.WORKER, | ||
Replicas: proto.Int32(1), | ||
}, | ||
}, | ||
TfImage: "tensorflow/tensorflow:1.3.0", | ||
}, | ||
expectingError: true, | ||
}, | ||
{ | ||
in: &tfv1.TfJobSpec{ | ||
ReplicaSpecs: []*tfv1.TfReplicaSpec{ | ||
{ | ||
Template: &v1.PodTemplateSpec{ | ||
Spec: v1.PodSpec{ | ||
Containers: []v1.Container{ | ||
{ | ||
Name: "tensorflow", | ||
}, | ||
}, | ||
}, | ||
}, | ||
TfReplicaType: tfv1.WORKER, | ||
Replicas: proto.Int32(1), | ||
}, | ||
}, | ||
TfImage: "tensorflow/tensorflow:1.3.0", | ||
TerminationPolicy: &tfv1.TerminationPolicySpec{ | ||
Chief: &tfv1.ChiefSpec{ | ||
ReplicaName: "WORKER", | ||
ReplicaIndex: 0, | ||
}, | ||
}, | ||
}, | ||
expectingError: false, | ||
}, | ||
} | ||
|
||
for _, c := range testCases { | ||
job := &tfv1.TfJob{ | ||
Spec: *c.in, | ||
} | ||
tfv1.SetObjectDefaults_TfJob(job) | ||
if err := ValidateTfJobSpec(&job.Spec); (err != nil) != c.expectingError { | ||
t.Errorf("unexpected validation result: %v", err) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.