From 0b7c1a4d8dd0e6e16387261834bb323d83593c2f Mon Sep 17 00:00:00 2001 From: avelichk Date: Wed, 6 Oct 2021 11:43:47 +0100 Subject: [PATCH 1/9] Update links and corresponding files for training-operator repository Use Go 1.17 --- PROJECT | 12 +-- README.md | 37 ++++---- cmd/training-operator.v1/main.go | 10 +-- docs/api/mxnet_generated.asciidoc | 24 ++--- docs/api/pytorch_generated.asciidoc | 18 ++-- docs/api/tensorflow_generated.asciidoc | 18 ++-- docs/api/xgboost_generated.asciidoc | 18 ++-- docs/design/tf_job_design_doc.md | 6 +- docs/development/developer_guide.md | 6 +- docs/monitoring/README.md | 2 +- docs/release/release.py | 2 +- docs/release/releasing.md | 6 +- docs/roadmap.md | 10 +-- docs/testing/e2e_testing.md | 10 +-- go.mod | 7 +- go.sum | 3 - hack/python-sdk/main.go | 16 ++-- hack/scripts/update-changelog.sh | 2 +- hack/update-codegen.sh | 87 +++++++++---------- pkg/apis/mxnet/v1/openapi_generated.go | 28 +++--- pkg/apis/mxnet/validation/validation.go | 2 +- pkg/apis/mxnet/validation/validation_test.go | 2 +- pkg/apis/pytorch/v1/openapi_generated.go | 26 +++--- pkg/apis/pytorch/validation/validation.go | 2 +- .../pytorch/validation/validation_test.go | 2 +- pkg/apis/tensorflow/v1/openapi_generated.go | 26 +++--- pkg/apis/tensorflow/validation/validation.go | 2 +- .../tensorflow/validation/validation_test.go | 2 +- pkg/apis/xgboost/v1/openapi_generated.go | 26 +++--- pkg/apis/xgboost/validation/validation.go | 2 +- .../xgboost/validation/validation_test.go | 2 +- pkg/client/clientset/versioned/clientset.go | 2 +- .../versioned/fake/clientset_generated.go | 6 +- .../clientset/versioned/fake/register.go | 2 +- .../clientset/versioned/scheme/register.go | 2 +- .../v1/fake/fake_tensorflow_client.go | 2 +- .../typed/tensorflow/v1/fake/fake_tfjob.go | 2 +- .../typed/tensorflow/v1/tensorflow_client.go | 4 +- .../versioned/typed/tensorflow/v1/tfjob.go | 4 +- .../informers/externalversions/factory.go | 6 +- .../informers/externalversions/generic.go | 2 +- .../internalinterfaces/factory_interfaces.go | 2 +- .../externalversions/tensorflow/interface.go | 4 +- .../tensorflow/v1/interface.go | 2 +- .../externalversions/tensorflow/v1/tfjob.go | 8 +- pkg/client/listers/tensorflow/v1/tfjob.go | 2 +- pkg/common/util/v1/testutil/const.go | 2 +- pkg/common/util/v1/testutil/pod.go | 2 +- pkg/common/util/v1/testutil/service.go | 2 +- pkg/common/util/v1/testutil/tfjob.go | 2 +- pkg/common/util/v1/testutil/util.go | 4 +- pkg/common/util/v1/unstructured/informer.go | 6 +- pkg/controller.v1/mxnet/mxjob_controller.go | 8 +- pkg/controller.v1/mxnet/mxnet.go | 2 +- pkg/controller.v1/mxnet/suite_test.go | 2 +- pkg/controller.v1/pytorch/pytorch.go | 2 +- .../pytorch/pytorchjob_controller.go | 8 +- pkg/controller.v1/pytorch/suite_test.go | 2 +- pkg/controller.v1/register_controller.go | 16 ++-- pkg/controller.v1/tensorflow/suite_test.go | 2 +- pkg/controller.v1/tensorflow/tensorflow.go | 2 +- .../tensorflow/tfjob_controller.go | 10 +-- pkg/controller.v1/tensorflow/util.go | 4 +- pkg/controller.v1/xgboost/suite_test.go | 2 +- pkg/controller.v1/xgboost/xgboost.go | 2 +- .../xgboost/xgboostjob_controller.go | 8 +- py/kubeflow/tf_operator/prow.py | 2 +- py/kubeflow/tf_operator/prow_test.py | 2 +- py/kubeflow/tf_operator/release.py | 16 ++-- py/kubeflow/tf_operator/release_test.py | 10 +-- py/kubeflow/tf_operator/simple_tfjob_tests.py | 2 +- py/kubeflow/tf_operator/util.py | 2 +- scripts/setup-tf-operator.sh | 7 +- sdk/python/.openapi-generator/VERSION | 1 + sdk/python/setup.py | 2 +- submit_release_job.sh | 15 ++-- test/test-app/components/params.libsonnet | 2 +- test/test-server/Makefile | 2 +- .../environments/releasing/params.libsonnet | 6 +- .../environments/test/params.libsonnet | 4 +- third_party/library/license.txt | 2 +- third_party_licenses/dep.txt | 2 +- third_party_licenses/dep_repo.manual.csv | 2 +- third_party_licenses/license_info.csv | 2 +- third_party_licenses/repo.txt | 2 +- vendor.go | 9 +- 86 files changed, 326 insertions(+), 320 deletions(-) create mode 100644 sdk/python/.openapi-generator/VERSION diff --git a/PROJECT b/PROJECT index e8589d0379..7335774ed6 100644 --- a/PROJECT +++ b/PROJECT @@ -1,7 +1,7 @@ layout: - go.kubebuilder.io/v3 -projectName: tf-operator -repo: github.com/kubeflow/tf-operator +projectName: training-operator +repo: github.com/kubeflow/training-operator resources: - api: crdVersion: v1 @@ -9,7 +9,7 @@ resources: controller: true group: kubeflow.org kind: XGBoostJob - path: github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1 + path: github.com/kubeflow/training-operator/pkg/apis/xgboost/v1 version: v1 - api: crdVersion: v1 @@ -17,7 +17,7 @@ resources: controller: true group: kubeflow.org kind: PyTorchJob - path: github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1 + path: github.com/kubeflow/training-operator/pkg/apis/pytorch/v1 version: v1 - api: crdVersion: v1 @@ -25,7 +25,7 @@ resources: controller: true group: kubeflow.org kind: TFJob - path: github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1 + path: github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1 version: v1 - api: crdVersion: v1 @@ -33,6 +33,6 @@ resources: controller: true group: kubeflow.org kind: MXJob - path: github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1 + path: github.com/kubeflow/training-operator/pkg/apis/mxnet/v1 version: v1 version: "3" diff --git a/README.md b/README.md index 1f4eee6a6c..a43e205251 100644 --- a/README.md +++ b/README.md @@ -22,22 +22,22 @@ run distributed or non-distributed TensorFlow/PyTorch/MXNet/XGBoost jobs on Kube ## Prerequisites -* Version >= 1.16 of Kubernetes -* Version >= 3.x of Kustomize -* Version >= 1.21.x of Kubectl +- Version >= 1.16 of Kubernetes +- Version >= 3.x of Kustomize +- Version >= 1.21.x of Kubectl ## Installation ### Master Branch ```bash -kubectl apply -k "github.com/kubeflow/tf-operator/manifests/overlays/standalone" +kubectl apply -k "github.com/kubeflow/training-operator/manifests/overlays/standalone" ``` ### Stable Release ```bash -kubectl apply -k "github.com/kubeflow/tf-operator/manifests/overlays/standalone?ref=v1.3.0" +kubectl apply -k "github.com/kubeflow/training-operator/manifests/overlays/standalone?ref=v1.3.0" ``` ### Tensorflow Release Only @@ -45,7 +45,7 @@ kubectl apply -k "github.com/kubeflow/tf-operator/manifests/overlays/standalone? For users who prefer to use original tensorflow controllers, please checkout v1.2-branch, we will maintain the bug fix in this branch. ```bash -kubectl apply -k "github.com/kubeflow/tf-operator/manifests/overlays/standalone?ref=v1.2.0" +kubectl apply -k "github.com/kubeflow/training-operator/manifests/overlays/standalone?ref=v1.2.0" ``` ### Python SDK for Kubeflow Training Operator @@ -53,6 +53,7 @@ kubectl apply -k "github.com/kubeflow/tf-operator/manifests/overlays/standalone? Training Operator provides Python SDK for the custom resources. More docs are available in [sdk/python](sdk/python) folder. Use `pip install` command to install the latest release of the SDK: + ``` pip install kubeflow-training ``` @@ -64,6 +65,7 @@ Please refer to the [quick-start-v1.md](docs/quick-start-v1.md) and [Kubeflow Tr ## API Documentation Please refer to API Documentation. + - [Tensorflow API Documentation](docs/api/tensorflow_generated.asciidoc) - [PyTorch API Documentation](docs/api/pytorch_generated.asciidoc) - [MXNet API Documentation](docs/api/mxnet_generated.asciidoc) @@ -82,7 +84,6 @@ This is a part of Kubeflow, so please see [readme in kubeflow/kubeflow](https:// Please refer to the [DEVELOPMENT](docs/development/developer_guide.md) - ## Change Log Please refer to [CHANGELOG](CHANGELOG.md) @@ -91,19 +92,19 @@ Please refer to [CHANGELOG](CHANGELOG.md) The following table lists the most recent few versions of the operator. -| Operator Version | API Version | Kubernetes Version | -| ------------- | ------------- | ------------- | -| `v1.0.x`| `v1` | 1.16+ | -| `v1.1.x`| `v1` | 1.16+ | -| `v1.2.x`| `v1` | 1.16+ | -| `v1.3.x`| `v1` | 1.18+ | -| `latest` (master HEAD) | `v1` | 1.18+ | +| Operator Version | API Version | Kubernetes Version | +| ---------------------- | ----------- | ------------------ | +| `v1.0.x` | `v1` | 1.16+ | +| `v1.1.x` | `v1` | 1.16+ | +| `v1.2.x` | `v1` | 1.16+ | +| `v1.3.x` | `v1` | 1.18+ | +| `latest` (master HEAD) | `v1` | 1.18+ | ## Acknowledgement This project was originally started as a distributed training operator for TensorFlow and later we merged efforts from other Kubeflow training operators to provide a unified and simplified experience for both users and developers. We are very grateful to all who filed issues or helped resolve them, asked and answered questions, and were part of inspiring discussions. We'd also like to thank everyone who's contributed to and maintained the original operators. -* PyTorch Operator: [list of contributors](https://github.com/kubeflow/pytorch-operator/graphs/contributors) and [maintainers](https://github.com/kubeflow/pytorch-operator/blob/master/OWNERS). -* MPI Operator: [list of contributors](https://github.com/kubeflow/mpi-operator/graphs/contributors) and [maintainers](https://github.com/kubeflow/mpi-operator/blob/master/OWNERS). -* XGBoost Operator: [list of contributors](https://github.com/kubeflow/xgboost-operator/graphs/contributors) and [maintainers](https://github.com/kubeflow/xgboost-operator/blob/master/OWNERS). -* MXNet Operator: [list of contributors](https://github.com/kubeflow/mxnet-operator/graphs/contributors) and [maintainers](https://github.com/kubeflow/mxnet-operator/blob/master/OWNERS). +- PyTorch Operator: [list of contributors](https://github.com/kubeflow/pytorch-operator/graphs/contributors) and [maintainers](https://github.com/kubeflow/pytorch-operator/blob/master/OWNERS). +- MPI Operator: [list of contributors](https://github.com/kubeflow/mpi-operator/graphs/contributors) and [maintainers](https://github.com/kubeflow/mpi-operator/blob/master/OWNERS). +- XGBoost Operator: [list of contributors](https://github.com/kubeflow/xgboost-operator/graphs/contributors) and [maintainers](https://github.com/kubeflow/xgboost-operator/blob/master/OWNERS). +- MXNet Operator: [list of contributors](https://github.com/kubeflow/mxnet-operator/graphs/contributors) and [maintainers](https://github.com/kubeflow/mxnet-operator/blob/master/OWNERS). diff --git a/cmd/training-operator.v1/main.go b/cmd/training-operator.v1/main.go index e108846b49..f9d5303c92 100644 --- a/cmd/training-operator.v1/main.go +++ b/cmd/training-operator.v1/main.go @@ -21,7 +21,7 @@ import ( "fmt" "os" - controller_v1 "github.com/kubeflow/tf-operator/pkg/controller.v1" + controller_v1 "github.com/kubeflow/training-operator/pkg/controller.v1" // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) // to ensure that exec-entrypoint and run can make use of them. @@ -33,10 +33,10 @@ import ( "sigs.k8s.io/controller-runtime/pkg/healthz" "sigs.k8s.io/controller-runtime/pkg/log/zap" - mxnetv1 "github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1" - pytorchv1 "github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1" - tensorflowv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1" - xgboostv1 "github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1" + mxnetv1 "github.com/kubeflow/training-operator/pkg/apis/mxnet/v1" + pytorchv1 "github.com/kubeflow/training-operator/pkg/apis/pytorch/v1" + tensorflowv1 "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1" + xgboostv1 "github.com/kubeflow/training-operator/pkg/apis/xgboost/v1" //+kubebuilder:scaffold:imports ) diff --git a/docs/api/mxnet_generated.asciidoc b/docs/api/mxnet_generated.asciidoc index ce44852a59..015e470a51 100644 --- a/docs/api/mxnet_generated.asciidoc +++ b/docs/api/mxnet_generated.asciidoc @@ -16,32 +16,32 @@ Package v1 is the v1 version of the API. Package v1 contains API Schema definitions for the kubeflow.org v1 API group .Resource Types -- xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-mxnet-v1-mxjob[$$MXJob$$] -- xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-mxnet-v1-mxjoblist[$$MXJobList$$] +- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-mxnet-v1-mxjob[$$MXJob$$] +- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-mxnet-v1-mxjoblist[$$MXJobList$$] === Definitions -[id="{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-mxnet-v1-jobmodetype"] +[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-mxnet-v1-jobmodetype"] ==== JobModeType (string) JobModeType id the type for JobMode .Appears In: **** -- xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-mxnet-v1-mxjobspec[$$MXJobSpec$$] +- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-mxnet-v1-mxjobspec[$$MXJobSpec$$] **** -[id="{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-mxnet-v1-mxjob"] +[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-mxnet-v1-mxjob"] ==== MXJob MXJob is the Schema for the mxjobs API .Appears In: **** -- xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-mxnet-v1-mxjoblist[$$MXJobList$$] +- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-mxnet-v1-mxjoblist[$$MXJobList$$] **** [cols="25a,75a", options="header"] @@ -52,12 +52,12 @@ MXJob is the Schema for the mxjobs API | *`TypeMeta`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#typemeta-v1-meta[$$TypeMeta$$]__ | | *`metadata`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#objectmeta-v1-meta[$$ObjectMeta$$]__ | Refer to Kubernetes API documentation for fields of `metadata`. -| *`spec`* __xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-mxnet-v1-mxjobspec[$$MXJobSpec$$]__ | +| *`spec`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-mxnet-v1-mxjobspec[$$MXJobSpec$$]__ | | *`status`* __xref:{anchor_prefix}-github-com-kubeflow-common-pkg-apis-common-v1-jobstatus[$$JobStatus$$]__ | |=== -[id="{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-mxnet-v1-mxjoblist"] +[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-mxnet-v1-mxjoblist"] ==== MXJobList MXJobList contains a list of MXJob @@ -72,25 +72,25 @@ MXJobList contains a list of MXJob | *`TypeMeta`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#typemeta-v1-meta[$$TypeMeta$$]__ | | *`metadata`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#listmeta-v1-meta[$$ListMeta$$]__ | Refer to Kubernetes API documentation for fields of `metadata`. -| *`items`* __xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-mxnet-v1-mxjob[$$MXJob$$]__ | +| *`items`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-mxnet-v1-mxjob[$$MXJob$$]__ | |=== -[id="{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-mxnet-v1-mxjobspec"] +[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-mxnet-v1-mxjobspec"] ==== MXJobSpec MXJobSpec defines the desired state of MXJob .Appears In: **** -- xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-mxnet-v1-mxjob[$$MXJob$$] +- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-mxnet-v1-mxjob[$$MXJob$$] **** [cols="25a,75a", options="header"] |=== | Field | Description | *`runPolicy`* __xref:{anchor_prefix}-github-com-kubeflow-common-pkg-apis-common-v1-runpolicy[$$RunPolicy$$]__ | RunPolicy encapsulates various runtime policies of the distributed training job, for example how to clean up resources and how long the job can stay active. -| *`jobMode`* __xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-mxnet-v1-jobmodetype[$$JobModeType$$]__ | JobMode specify the kind of MXjob to do. Different mode may have different MXReplicaSpecs request +| *`jobMode`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-mxnet-v1-jobmodetype[$$JobModeType$$]__ | JobMode specify the kind of MXjob to do. Different mode may have different MXReplicaSpecs request | *`mxReplicaSpecs`* __object (keys:ReplicaType, values:ReplicaSpec)__ | MXReplicaSpecs is map of common.ReplicaType and common.ReplicaSpec specifies the MX replicas to run. For example, { "Scheduler": common.ReplicaSpec, "Server": common.ReplicaSpec, "Worker": common.ReplicaSpec, } |=== diff --git a/docs/api/pytorch_generated.asciidoc b/docs/api/pytorch_generated.asciidoc index ba97b925d7..c5f8fa8203 100644 --- a/docs/api/pytorch_generated.asciidoc +++ b/docs/api/pytorch_generated.asciidoc @@ -16,20 +16,20 @@ Package v1 is the v1 version of the API. Package v1 contains API Schema definitions for the kubeflow.org v1 API group .Resource Types -- xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-pytorch-v1-pytorchjob[$$PyTorchJob$$] -- xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-pytorch-v1-pytorchjoblist[$$PyTorchJobList$$] +- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-pytorch-v1-pytorchjob[$$PyTorchJob$$] +- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-pytorch-v1-pytorchjoblist[$$PyTorchJobList$$] === Definitions -[id="{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-pytorch-v1-pytorchjob"] +[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-pytorch-v1-pytorchjob"] ==== PyTorchJob PyTorchJob Represents a PyTorchJob resource. .Appears In: **** -- xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-pytorch-v1-pytorchjoblist[$$PyTorchJobList$$] +- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-pytorch-v1-pytorchjoblist[$$PyTorchJobList$$] **** [cols="25a,75a", options="header"] @@ -40,12 +40,12 @@ PyTorchJob Represents a PyTorchJob resource. | *`TypeMeta`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#typemeta-v1-meta[$$TypeMeta$$]__ | Standard Kubernetes type metadata. | *`metadata`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#objectmeta-v1-meta[$$ObjectMeta$$]__ | Refer to Kubernetes API documentation for fields of `metadata`. -| *`spec`* __xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-pytorch-v1-pytorchjobspec[$$PyTorchJobSpec$$]__ | Specification of the desired state of the PyTorchJob. +| *`spec`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-pytorch-v1-pytorchjobspec[$$PyTorchJobSpec$$]__ | Specification of the desired state of the PyTorchJob. | *`status`* __xref:{anchor_prefix}-github-com-kubeflow-common-pkg-apis-common-v1-jobstatus[$$JobStatus$$]__ | Most recently observed status of the PyTorchJob. Read-only (modified by the system). |=== -[id="{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-pytorch-v1-pytorchjoblist"] +[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-pytorch-v1-pytorchjoblist"] ==== PyTorchJobList PyTorchJobList is a list of PyTorchJobs. @@ -60,18 +60,18 @@ PyTorchJobList is a list of PyTorchJobs. | *`TypeMeta`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#typemeta-v1-meta[$$TypeMeta$$]__ | Standard type metadata. | *`metadata`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#listmeta-v1-meta[$$ListMeta$$]__ | Refer to Kubernetes API documentation for fields of `metadata`. -| *`items`* __xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-pytorch-v1-pytorchjob[$$PyTorchJob$$]__ | List of PyTorchJobs. +| *`items`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-pytorch-v1-pytorchjob[$$PyTorchJob$$]__ | List of PyTorchJobs. |=== -[id="{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-pytorch-v1-pytorchjobspec"] +[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-pytorch-v1-pytorchjobspec"] ==== PyTorchJobSpec PyTorchJobSpec is a desired state description of the PyTorchJob. .Appears In: **** -- xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-pytorch-v1-pytorchjob[$$PyTorchJob$$] +- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-pytorch-v1-pytorchjob[$$PyTorchJob$$] **** [cols="25a,75a", options="header"] diff --git a/docs/api/tensorflow_generated.asciidoc b/docs/api/tensorflow_generated.asciidoc index abeed19c1c..89e5da3e0d 100644 --- a/docs/api/tensorflow_generated.asciidoc +++ b/docs/api/tensorflow_generated.asciidoc @@ -16,20 +16,20 @@ Package v1 is the v1 version of the API. Package v1 contains API Schema definitions for the kubeflow.org v1 API group .Resource Types -- xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-tensorflow-v1-tfjob[$$TFJob$$] -- xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-tensorflow-v1-tfjoblist[$$TFJobList$$] +- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-tensorflow-v1-tfjob[$$TFJob$$] +- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-tensorflow-v1-tfjoblist[$$TFJobList$$] === Definitions -[id="{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-tensorflow-v1-tfjob"] +[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-tensorflow-v1-tfjob"] ==== TFJob TFJob represents a TFJob resource. .Appears In: **** -- xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-tensorflow-v1-tfjoblist[$$TFJobList$$] +- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-tensorflow-v1-tfjoblist[$$TFJobList$$] **** [cols="25a,75a", options="header"] @@ -40,12 +40,12 @@ TFJob represents a TFJob resource. | *`TypeMeta`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#typemeta-v1-meta[$$TypeMeta$$]__ | Standard Kubernetes type metadata. | *`metadata`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#objectmeta-v1-meta[$$ObjectMeta$$]__ | Refer to Kubernetes API documentation for fields of `metadata`. -| *`spec`* __xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-tensorflow-v1-tfjobspec[$$TFJobSpec$$]__ | Specification of the desired state of the TFJob. +| *`spec`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-tensorflow-v1-tfjobspec[$$TFJobSpec$$]__ | Specification of the desired state of the TFJob. | *`status`* __xref:{anchor_prefix}-github-com-kubeflow-common-pkg-apis-common-v1-jobstatus[$$JobStatus$$]__ | Most recently observed status of the TFJob. Populated by the system. Read-only. |=== -[id="{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-tensorflow-v1-tfjoblist"] +[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-tensorflow-v1-tfjoblist"] ==== TFJobList TFJobList is a list of TFJobs. @@ -60,18 +60,18 @@ TFJobList is a list of TFJobs. | *`TypeMeta`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#typemeta-v1-meta[$$TypeMeta$$]__ | Standard type metadata. | *`metadata`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#listmeta-v1-meta[$$ListMeta$$]__ | Refer to Kubernetes API documentation for fields of `metadata`. -| *`items`* __xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-tensorflow-v1-tfjob[$$TFJob$$]__ | List of TFJobs. +| *`items`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-tensorflow-v1-tfjob[$$TFJob$$]__ | List of TFJobs. |=== -[id="{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-tensorflow-v1-tfjobspec"] +[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-tensorflow-v1-tfjobspec"] ==== TFJobSpec TFJobSpec is a desired state description of the TFJob. .Appears In: **** -- xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-tensorflow-v1-tfjob[$$TFJob$$] +- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-tensorflow-v1-tfjob[$$TFJob$$] **** [cols="25a,75a", options="header"] diff --git a/docs/api/xgboost_generated.asciidoc b/docs/api/xgboost_generated.asciidoc index f8860d26f4..69c881e7cc 100644 --- a/docs/api/xgboost_generated.asciidoc +++ b/docs/api/xgboost_generated.asciidoc @@ -16,20 +16,20 @@ Package v1 is the v1 version of the API. Package v1 contains API Schema definitions for the kubeflow.org v1 API group .Resource Types -- xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-xgboost-v1-xgboostjob[$$XGBoostJob$$] -- xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-xgboost-v1-xgboostjoblist[$$XGBoostJobList$$] +- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-xgboost-v1-xgboostjob[$$XGBoostJob$$] +- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-xgboost-v1-xgboostjoblist[$$XGBoostJobList$$] === Definitions -[id="{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-xgboost-v1-xgboostjob"] +[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-xgboost-v1-xgboostjob"] ==== XGBoostJob XGBoostJob is the Schema for the xgboostjobs API .Appears In: **** -- xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-xgboost-v1-xgboostjoblist[$$XGBoostJobList$$] +- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-xgboost-v1-xgboostjoblist[$$XGBoostJobList$$] **** [cols="25a,75a", options="header"] @@ -40,12 +40,12 @@ XGBoostJob is the Schema for the xgboostjobs API | *`TypeMeta`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#typemeta-v1-meta[$$TypeMeta$$]__ | | *`metadata`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#objectmeta-v1-meta[$$ObjectMeta$$]__ | Refer to Kubernetes API documentation for fields of `metadata`. -| *`spec`* __xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-xgboost-v1-xgboostjobspec[$$XGBoostJobSpec$$]__ | +| *`spec`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-xgboost-v1-xgboostjobspec[$$XGBoostJobSpec$$]__ | | *`status`* __xref:{anchor_prefix}-github-com-kubeflow-common-pkg-apis-common-v1-jobstatus[$$JobStatus$$]__ | |=== -[id="{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-xgboost-v1-xgboostjoblist"] +[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-xgboost-v1-xgboostjoblist"] ==== XGBoostJobList XGBoostJobList contains a list of XGBoostJob @@ -60,18 +60,18 @@ XGBoostJobList contains a list of XGBoostJob | *`TypeMeta`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#typemeta-v1-meta[$$TypeMeta$$]__ | | *`metadata`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#listmeta-v1-meta[$$ListMeta$$]__ | Refer to Kubernetes API documentation for fields of `metadata`. -| *`items`* __xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-xgboost-v1-xgboostjob[$$XGBoostJob$$]__ | +| *`items`* __xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-xgboost-v1-xgboostjob[$$XGBoostJob$$]__ | |=== -[id="{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-xgboost-v1-xgboostjobspec"] +[id="{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-xgboost-v1-xgboostjobspec"] ==== XGBoostJobSpec XGBoostJobSpec defines the desired state of XGBoostJob .Appears In: **** -- xref:{anchor_prefix}-github-com-kubeflow-tf-operator-pkg-apis-xgboost-v1-xgboostjob[$$XGBoostJob$$] +- xref:{anchor_prefix}-github-com-kubeflow-training-operator-pkg-apis-xgboost-v1-xgboostjob[$$XGBoostJob$$] **** [cols="25a,75a", options="header"] diff --git a/docs/design/tf_job_design_doc.md b/docs/design/tf_job_design_doc.md index 4566007c07..0490aeb48f 100644 --- a/docs/design/tf_job_design_doc.md +++ b/docs/design/tf_job_design_doc.md @@ -32,7 +32,7 @@ The input from the K8s team that developed CRDs and various controllers is that ## TFJob Resource The TFJob CRD defines a TFJob resource for K8s. -The [TFJob](https://github.com/kubeflow/tf-operator/blob/master/pkg/spec/tf_job.go#L55) +The [TFJob](https://github.com/kubeflow/training-operator/blob/master/pkg/apis/tensorflow/v1/types.go#L29) resource is a collection of TfReplicas. Each TfReplica corresponds to a set of TensorFlow processes performing a role in the job; e.g. master, parameter server or worker. The set of replica types can be expanded (it is just an enum) to support new TF patterns such as eval workers. Figure 1. shows an example yaml spec for a distributed job. @@ -76,14 +76,14 @@ As illustrated by Fig 1, I made an explicit decision not to try to hide or repla The controller can be used to configure defaults for TFJob to create a simpler user experience. The most common use for this right now is supporting GPUs. To use GPUs, the NVIDIA drivers and libraries need to be mounted from the host into the container. This step should become unnecessary with Kubernetes 1.8. The TFJob controller will automatically add these volume mounts based on configuration specified when the controller is started. This prevents users from having to specify them for each job. Instead, only the cluster administrator who deploys the TFJob controller needs to know how the volumes should be configured. -Another use case is minimizing the boilerplate users have to write to run standard processes (e.g. [Parameter Servers](https://github.com/kubeflow/tf-operator/pull/36#discussion_r141135711)) using official TF Docker images. +Another use case is minimizing the boilerplate users have to write to run standard processes (e.g. [Parameter Servers](https://github.com/kubeflow/training-operator/pull/36#discussion_r141135711)) using official TF Docker images. ## Controller The controller manages a distributed TFJob by creating a series of Job controllers Fig 2. The TFJob controller sets the environment variable TF_CONFIG to make the TensorFlow cluster spec and replica type (PS, WORKER, MASTER) and replica index available to TensorFlow code. The Job controller takes care of restarting TensorFlow processes that terminate due to an error. Additional logic in the TFJob controller looks at exit codes and fails the job if a TF process exits with an exit code indicating a permanent error. The TFJob controller treats exit codes of 1-127 as permanent errors; this is an arbitrary convention. -When the master exits successfully or with a permanent error the job is considered finished. There is an open issue([issues/61](https://github.com/kubeflow/tf-operator/issues/61)) to make the changes necessary to support evaluation with the Estimator API in 1.4. The pods aren't deleted until the TFJob is deleted. This allows the logs to be fetched via kubectl logs. +When the master exits successfully or with a permanent error the job is considered finished. There is an open issue([issues/61](https://github.com/kubeflow/training-operator/issues/61)) to make the changes necessary to support evaluation with the Estimator API in 1.4. The pods aren't deleted until the TFJob is deleted. This allows the logs to be fetched via kubectl logs. ![Resources for TFJob](docs/diagrams/tfjob_k8s_resources.svg) diff --git a/docs/development/developer_guide.md b/docs/development/developer_guide.md index 67c86ec104..e70a50dfaf 100644 --- a/docs/development/developer_guide.md +++ b/docs/development/developer_guide.md @@ -12,10 +12,10 @@ Create a symbolic link inside your GOPATH to the location you checked out the co ```sh mkdir -p ${go env GOPATH}/src/github.com/kubeflow -ln -sf ${GIT_TRAINING} ${go env GOPATH}/src/github.com/kubeflow/tf-operator +ln -sf ${GIT_TRAINING} ${go env GOPATH}/src/github.com/kubeflow/training-operator ``` -* GIT_TRAINING should be the location where you checked out https://github.com/kubeflow/tf-operator +* GIT_TRAINING should be the location where you checked out https://github.com/kubeflow/training-operator Install dependencies @@ -26,7 +26,7 @@ go mod vendor Build it ```sh -go install github.com/kubeflow/tf-operator/cmd/training-operator.v1 +go install github.com/kubeflow/training-operator/cmd/training-operator.v1 ``` ## Running the Operator Locally diff --git a/docs/monitoring/README.md b/docs/monitoring/README.md index 8f0e019ec5..68a80d1a63 100644 --- a/docs/monitoring/README.md +++ b/docs/monitoring/README.md @@ -58,7 +58,7 @@ tf_operator_is_leader ### Report TFJob metrics: -*Note*: If you are using release v1 tf-operator, these TFJob metrics don't have suffix `total`. So you have to use metric name like `tf_operator_jobs_created` to get your metrics. See [PR](https://github.com/kubeflow/tf-operator/pull/1055) to get more information. +*Note*: If you are using release v1 tf-operator, these TFJob metrics don't have suffix `total`. So you have to use metric name like `tf_operator_jobs_created` to get your metrics. See [PR](https://github.com/kubeflow/training-operator/pull/1055) to get more information. **Job Creation** ``` diff --git a/docs/release/release.py b/docs/release/release.py index 0dffd95c4b..a463220102 100644 --- a/docs/release/release.py +++ b/docs/release/release.py @@ -37,7 +37,7 @@ def generate(self, pr_id): 8d179f70 Fix: Remove Github CD workflow (#1263) ''' -g = ChangelogGenerator("kubeflow/tf-operator") +g = ChangelogGenerator("kubeflow/training-operator") for pr_match in re.finditer(r"#(\d+)", payload): pr_id = int(pr_match.group(1)) print("* {}".format(g.generate(pr_id))) diff --git a/docs/release/releasing.md b/docs/release/releasing.md index ced3d7fd0a..cd55d24aae 100644 --- a/docs/release/releasing.md +++ b/docs/release/releasing.md @@ -15,7 +15,7 @@ ### Release Process -1. Make sure the last commit you want to release past `kubeflow-tf-operator-postsubmit` testing. +1. Make sure the last commit you want to release past `kubeflow-training-operator-postsubmit` testing. 1. Check out that commit (in this example, we'll use `6214e560`). @@ -90,14 +90,14 @@ 8d179f70 Fix: Remove Github CD workflow (#1263) ''' - g = ChangelogGenerator("kubeflow/tf-operator") + g = ChangelogGenerator("kubeflow/training-operator") for pr_match in re.finditer(r"#(\d+)", payload): pr_id = int(pr_match.group(1)) print("* {}".format(g.generate(pr_id))) ``` 1. Cut release from tags and copy results from last step. You can group commits into `Features`, `Bugs` etc. -See example [v1.2.0 release](https://github.com/kubeflow/tf-operator/releases/tag/v1.2.0) +See example [v1.2.0 release](https://github.com/kubeflow/training-operator/releases/tag/v1.2.0) 1. Send a PR to update [CHANGELOG.md](../../CHANGELOG.md) \ No newline at end of file diff --git a/docs/roadmap.md b/docs/roadmap.md index d71105f267..381fc49ce9 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -2,12 +2,12 @@ # Q1 & Q2 -- Better log support - - Support log levels [#1132](https://github.com/kubeflow/tf-operator/issues/1132) - - Log errors in events -- Validating webhook [#1016](https://github.com/kubeflow/tf-operator/issues/1016) +- Better log support + - Support log levels [#1132](https://github.com/kubeflow/training-operator/issues/1132) + - Log errors in events +- Validating webhook [#1016](https://github.com/kubeflow/training-operator/issues/1016) # Q3 & Q4 - Better Volcano support - - Support queue [#916](https://github.com/kubeflow/tf-operator/issues/916) \ No newline at end of file + - Support queue [#916](https://github.com/kubeflow/training-operator/issues/916) diff --git a/docs/testing/e2e_testing.md b/docs/testing/e2e_testing.md index 810a3c0b73..14389bac65 100644 --- a/docs/testing/e2e_testing.md +++ b/docs/testing/e2e_testing.md @@ -4,7 +4,7 @@ The E2E tests for TF operator are implemented as Argo workflows. For more backgr about Argo (not required for understanding the rest of this document), please take a look at [this link](https://github.com/kubeflow/testing/blob/master/README.md). -Test results can be monitored at the [Prow dashboard](https://prow.k8s.io/?repo=kubeflow%2Ftf-operator). +Test results can be monitored at the [Prow dashboard](https://prow.k8s.io/?repo=kubeflow%2Ftraining-operator). At a high level, the E2E test suites are structured as Python test classes. Each test class contains one or more tests. A test typically runs the following: @@ -16,7 +16,7 @@ one or more tests. A test typically runs the following: ## Adding a Test Method -An example can be found [here](https://github.com/kubeflow/tf-operator/blob/master/py/kubeflow/tf_operator/simple_tfjob_tests.py). +An example can be found [here](https://github.com/kubeflow/training-operator/blob/master/py/kubeflow/tf_operator/simple_tfjob_tests.py). A test class can have several test methods. Each method executes a series of user actions (e.g. starting or deleting a TFJob), and performs verifications of expected results (e.g. TFJob exits with @@ -47,11 +47,11 @@ be added to one of the helper modules: ## Adding a TFJob Spec This is needed if you want to use your own TFJob spec instead of an existing one. An example can be found -[here](https://github.com/kubeflow/tf-operator/tree/master/test/workflows/components/simple_tfjob_v1.jsonnet). +[here](https://github.com/kubeflow/training-operator/tree/master/test/workflows/components/simple_tfjob_v1.jsonnet). All TFJob specs should be placed in the same directory. These are similar to actual TFJob specs. Note that many of these are using the -[tf-operator-test-server](https://github.com/kubeflow/tf-operator/tree/master/test/test-server) as the test image. +[training-operator-test-server](https://github.com/kubeflow/training-operator/tree/master/test/test-server) as the test image. This gives us more control over when each replica exits, and allows us to send specific requests like fetching the runtime TensorFlow config. @@ -61,7 +61,7 @@ This is needed if you are creating a new test class. Creating a new test class i a new feature, and want to group all relevant E2E tests together. New test classes should be added as Argo workflow steps to the -[workflows.libsonnet](https://github.com/kubeflow/tf-operator/blob/master/test/workflows/components/workflows.libsonnet) file. +[workflows.libsonnet](https://github.com/kubeflow/training-operator/blob/master/test/workflows/components/workflows.libsonnet) file. Under the templates section, add the following to the dag: ``` diff --git a/go.mod b/go.mod index a82d0b996c..bef4066bc3 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ -module github.com/kubeflow/tf-operator +module github.com/kubeflow/training-operator go 1.17 @@ -82,7 +82,12 @@ require ( gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776 // indirect +<<<<<<< HEAD k8s.io/component-base v0.19.9 // indirect +======= + k8s.io/apiextensions-apiserver v0.19.2 // indirect + k8s.io/component-base v0.19.6 // indirect +>>>>>>> 948d6cfd (Update links and corresponding files for training-operator repository) k8s.io/gengo v0.0.0-20200428234225-8167cfdcfc14 // indirect k8s.io/klog/v2 v2.2.0 // indirect k8s.io/utils v0.0.0-20200912215256-4140de9c8800 // indirect diff --git a/go.sum b/go.sum index 19d27472c0..82d9a61ee3 100644 --- a/go.sum +++ b/go.sum @@ -825,8 +825,6 @@ k8s.io/code-generator v0.19.9 h1:nj1gVb/4P4C53hnBtdTaxZDlJ1jEkrQnAy+n4BYGVHs= k8s.io/code-generator v0.19.9/go.mod h1:lwEq3YnLYb/7uVXLorOJfxg+cUu2oihFhHZ0n9NIla0= k8s.io/component-base v0.19.2/go.mod h1:g5LrsiTiabMLZ40AR6Hl45f088DevyGY+cCE2agEIVo= k8s.io/component-base v0.19.6/go.mod h1:8Btsf8J00/fVDa/YFmXjei7gVkcFrlKZXjSeP4SZNJg= -k8s.io/component-base v0.19.9 h1:GOjvFCDgTRfLz6v3xshO0QbqWJN5nAkJzypc2BIfxOw= -k8s.io/component-base v0.19.9/go.mod h1:x9UmpImvXgVry1s9/hINgLz6iGBYUGvy3Xm7KZh1nnI= k8s.io/gengo v0.0.0-20200413195148-3a45101e95ac/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0= k8s.io/gengo v0.0.0-20200428234225-8167cfdcfc14 h1:t4L10Qfx/p7ASH3gXCdIUtPbbIuegCoUJf3TMSFekjw= k8s.io/gengo v0.0.0-20200428234225-8167cfdcfc14/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0= @@ -842,7 +840,6 @@ k8s.io/utils v0.0.0-20200912215256-4140de9c8800 h1:9ZNvfPvVIEsp/T1ez4GQuzCcCTEQW k8s.io/utils v0.0.0-20200912215256-4140de9c8800/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.9/go.mod h1:dzAXnQbTRyDlZPJX2SUPEqvnB+j7AJjtlox7PEwigU0= -sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.15/go.mod h1:LEScyzhFmoF5pso/YSeBstl57mOzx9xlU9n85RGrDQg= sigs.k8s.io/controller-runtime v0.7.2 h1:gD2JZp0bBLLuvSRYVNvox+bRCz1UUUxKDjPUCb56Ukk= sigs.k8s.io/controller-runtime v0.7.2/go.mod h1:pJ3YBrJiAqMAZKi6UVGuE98ZrroV1p+pIhoHsMm9wdU= sigs.k8s.io/structured-merge-diff/v4 v4.0.1 h1:YXTMot5Qz/X1iBRJhAt+vI+HVttY0WkSqqhKxQ0xVbA= diff --git a/hack/python-sdk/main.go b/hack/python-sdk/main.go index cc1effddeb..7e161fba81 100644 --- a/hack/python-sdk/main.go +++ b/hack/python-sdk/main.go @@ -19,14 +19,14 @@ package main import ( "encoding/json" "fmt" - mxnet "github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1" - pytorch "github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1" - tensorflow "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1" - xgboost "github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1" "os" "strings" "github.com/go-openapi/spec" + mxnet "github.com/kubeflow/training-operator/pkg/apis/mxnet/v1" + pytorch "github.com/kubeflow/training-operator/pkg/apis/pytorch/v1" + tensorflow "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1" + xgboost "github.com/kubeflow/training-operator/pkg/apis/xgboost/v1" "k8s.io/klog" "k8s.io/kube-openapi/pkg/common" ) @@ -88,10 +88,10 @@ func main() { } func swaggify(name string) string { - name = strings.Replace(name, "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/", "", -1) - name = strings.Replace(name, "github.com/kubeflow/tf-operator/pkg/apis/pytorch/", "", -1) - name = strings.Replace(name, "github.com/kubeflow/tf-operator/pkg/apis/mxnet/", "", -1) - name = strings.Replace(name, "github.com/kubeflow/tf-operator/pkg/apis/xgboost/", "", -1) + name = strings.Replace(name, "github.com/kubeflow/training-operator/pkg/apis/tensorflow/", "", -1) + name = strings.Replace(name, "github.com/kubeflow/training-operator/pkg/apis/pytorch/", "", -1) + name = strings.Replace(name, "github.com/kubeflow/training-operator/pkg/apis/mxnet/", "", -1) + name = strings.Replace(name, "github.com/kubeflow/training-operator/pkg/apis/xgboost/", "", -1) name = strings.Replace(name, "github.com/kubeflow/common/pkg/apis/common/", "", -1) name = strings.Replace(name, "k8s.io/api/core/", "", -1) name = strings.Replace(name, "k8s.io/apimachinery/pkg/apis/meta/", "", -1) diff --git a/hack/scripts/update-changelog.sh b/hack/scripts/update-changelog.sh index 7e3b30d579..b327d8fc09 100755 --- a/hack/scripts/update-changelog.sh +++ b/hack/scripts/update-changelog.sh @@ -35,7 +35,7 @@ then exit 1 fi -github_changelog_generator -t ${GITHUB_TOKEN} -u kubeflow -p tf-operator \ +github_changelog_generator -t ${GITHUB_TOKEN} -u kubeflow -p training-operator \ --exclude-labels community/discussion,cmmunity/question,duplicate,question,invalid,wontfix \ --bug-labels kind/bug,problems/bug \ --enhancement-labels improvement/optimization,kind/enhancement,improvement/enhancement,addition/feature,kind/feature \ diff --git a/hack/update-codegen.sh b/hack/update-codegen.sh index 5d4aaaedf9..43efcfefa2 100755 --- a/hack/update-codegen.sh +++ b/hack/update-codegen.sh @@ -27,11 +27,11 @@ set -o nounset set -o pipefail SCRIPT_ROOT=$(dirname "${BASH_SOURCE[0]}")/.. -ROOT_PKG=github.com/kubeflow/tf-operator +ROOT_PKG=github.com/kubeflow/training-operator # Grab code-generator version from go.sum -CODEGEN_VERSION=$(grep 'k8s.io/code-generator' go.sum | awk '{print $2}' | sed 's/\/go.mod//g' | head -1) -CODEGEN_PKG=$(echo `go env GOPATH`"/pkg/mod/k8s.io/code-generator@${CODEGEN_VERSION}") +CODEGEN_VERSION=$(grep 'k8s.io/code-generator' go.mod | awk '{print $2}') +CODEGEN_PKG=$(echo $(go env GOPATH)"/pkg/mod/k8s.io/code-generator@${CODEGEN_VERSION}") if [[ ! -d ${CODEGEN_PKG} ]]; then echo "${CODEGEN_PKG} is missing. Running 'go mod download'." @@ -41,8 +41,8 @@ fi echo ">> Using ${CODEGEN_PKG}" # Grab openapi-gen version from go.mod -OPENAPI_VERSION=$(grep 'k8s.io/kube-openapi' go.sum | awk '{print $2}' | sed 's/\/go.mod//g' | head -1) -OPENAPI_PKG=$(echo `go env GOPATH`"/pkg/mod/k8s.io/kube-openapi@${OPENAPI_VERSION}") +OPENAPI_VERSION=$(grep 'k8s.io/kube-openapi' go.mod | awk '{print $2}') +OPENAPI_PKG=$(echo $(go env GOPATH)"/pkg/mod/k8s.io/kube-openapi@${OPENAPI_VERSION}") if [[ ! -d ${OPENAPI_PKG} ]]; then echo "${OPENAPI_PKG} is missing. Running 'go mod download'." @@ -67,17 +67,16 @@ echo ">> Temporary output directory ${TEMP_DIR}" # Ensure we can execute. chmod +x ${CODEGEN_PKG}/generate-groups.sh - # generate the code with: # --output-base because this script should also be able to run inside the vendor dir of # k8s.io/kubernetes. The output-base is needed for the generators to output into the vendor dir # instead of the $GOPATH directly. For normal projects this can be dropped. cd ${SCRIPT_ROOT} ${CODEGEN_PKG}/generate-groups.sh "all" \ - github.com/kubeflow/tf-operator/pkg/client github.com/kubeflow/tf-operator/pkg/apis \ - tensorflow:v1 \ - --output-base "${TEMP_DIR}" \ - --go-header-file hack/boilerplate/boilerplate.go.txt + github.com/kubeflow/training-operator/pkg/client github.com/kubeflow/training-operator/pkg/apis \ + tensorflow:v1 \ + --output-base "${TEMP_DIR}" \ + --go-header-file hack/boilerplate/boilerplate.go.txt # Notice: The code in code-generator does not generate defaulter by default. # We need to build binary from vendor cmd folder. @@ -86,30 +85,30 @@ ${CODEGEN_PKG}/generate-groups.sh "all" \ # ${GOPATH}/bin/defaulter-gen is automatically built from ${CODEGEN_PKG}/generate-groups.sh echo "Generating defaulters for tensorflow/v1" -${GOPATH}/bin/defaulter-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1 \ - -O zz_generated.defaults \ - --output-package github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1 \ - --go-header-file hack/boilerplate/boilerplate.go.txt "$@" +${GOPATH}/bin/defaulter-gen --input-dirs github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1 \ + -O zz_generated.defaults \ + --output-package github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1 \ + --go-header-file hack/boilerplate/boilerplate.go.txt "$@" echo "Generating defaulters for pytorch/v1" -${GOPATH}/bin/defaulter-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1 \ - -O zz_generated.defaults \ - --output-package github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1 \ - --go-header-file hack/boilerplate/boilerplate.go.txt "$@" +${GOPATH}/bin/defaulter-gen --input-dirs github.com/kubeflow/training-operator/pkg/apis/pytorch/v1 \ + -O zz_generated.defaults \ + --output-package github.com/kubeflow/training-operator/pkg/apis/pytorch/v1 \ + --go-header-file hack/boilerplate/boilerplate.go.txt "$@" echo "Generating defaulters for mxnet/v1" -${GOPATH}/bin/defaulter-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1 \ - -O zz_generated.defaults \ - --output-package github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1 \ - --go-header-file hack/boilerplate/boilerplate.go.txt "$@" +${GOPATH}/bin/defaulter-gen --input-dirs github.com/kubeflow/training-operator/pkg/apis/mxnet/v1 \ + -O zz_generated.defaults \ + --output-package github.com/kubeflow/training-operator/pkg/apis/mxnet/v1 \ + --go-header-file hack/boilerplate/boilerplate.go.txt "$@" echo "Generating defaulters for xgboost/v1" -${GOPATH}/bin/defaulter-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1 \ - -O zz_generated.defaults \ - --output-package github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1 \ - --go-header-file hack/boilerplate/boilerplate.go.txt "$@" +${GOPATH}/bin/defaulter-gen --input-dirs github.com/kubeflow/training-operator/pkg/apis/xgboost/v1 \ + -O zz_generated.defaults \ + --output-package github.com/kubeflow/training-operator/pkg/apis/xgboost/v1 \ + --go-header-file hack/boilerplate/boilerplate.go.txt "$@" -cd - > /dev/null +cd - >/dev/null # Notice: The code in kube-openapi does not generate defaulter by default. # We need to build binary from pkg cmd folder. @@ -117,30 +116,30 @@ echo "Building openapi-gen" go build -o openapi-gen ${OPENAPI_PKG}/cmd/openapi-gen echo "Generating OpenAPI specification for tensorflow/v1" -./openapi-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1,github.com/kubeflow/common/pkg/apis/common/v1 \ - --report-filename=hack/violation_exception.list \ - --output-package github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1 \ - --go-header-file hack/boilerplate/boilerplate.go.txt "$@" +./openapi-gen --input-dirs github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1,github.com/kubeflow/common/pkg/apis/common/v1 \ + --report-filename=hack/violation_exception.list \ + --output-package github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1 \ + --go-header-file hack/boilerplate/boilerplate.go.txt "$@" echo "Generating OpenAPI specification for pytorch/v1" -./openapi-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1,github.com/kubeflow/common/pkg/apis/common/v1 \ - --report-filename=hack/violation_exception.list \ - --output-package github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1 \ - --go-header-file hack/boilerplate/boilerplate.go.txt "$@" +./openapi-gen --input-dirs github.com/kubeflow/training-operator/pkg/apis/pytorch/v1,github.com/kubeflow/common/pkg/apis/common/v1 \ + --report-filename=hack/violation_exception.list \ + --output-package github.com/kubeflow/training-operator/pkg/apis/pytorch/v1 \ + --go-header-file hack/boilerplate/boilerplate.go.txt "$@" echo "Generating OpenAPI specification for mxnet/v1" -./openapi-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1,github.com/kubeflow/common/pkg/apis/common/v1 \ - --report-filename=hack/violation_exception.list \ - --output-package github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1 \ - --go-header-file hack/boilerplate/boilerplate.go.txt "$@" +./openapi-gen --input-dirs github.com/kubeflow/training-operator/pkg/apis/mxnet/v1,github.com/kubeflow/common/pkg/apis/common/v1 \ + --report-filename=hack/violation_exception.list \ + --output-package github.com/kubeflow/training-operator/pkg/apis/mxnet/v1 \ + --go-header-file hack/boilerplate/boilerplate.go.txt "$@" echo "Generating OpenAPI specification for xgboost/v1" -./openapi-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1,github.com/kubeflow/common/pkg/apis/common/v1 \ - --report-filename=hack/violation_exception.list \ - --output-package github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1 \ - --go-header-file hack/boilerplate/boilerplate.go.txt "$@" +./openapi-gen --input-dirs github.com/kubeflow/training-operator/pkg/apis/xgboost/v1,github.com/kubeflow/common/pkg/apis/common/v1 \ + --report-filename=hack/violation_exception.list \ + --output-package github.com/kubeflow/training-operator/pkg/apis/xgboost/v1 \ + --go-header-file hack/boilerplate/boilerplate.go.txt "$@" -cd - > /dev/null +cd - >/dev/null # Copy everything back. cp -a "${TEMP_DIR}/${ROOT_PKG}/." "${SCRIPT_ROOT}/" diff --git a/pkg/apis/mxnet/v1/openapi_generated.go b/pkg/apis/mxnet/v1/openapi_generated.go index 46397b7975..ac2438b62e 100644 --- a/pkg/apis/mxnet/v1/openapi_generated.go +++ b/pkg/apis/mxnet/v1/openapi_generated.go @@ -28,16 +28,16 @@ import ( func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenAPIDefinition { return map[string]common.OpenAPIDefinition{ - "github.com/kubeflow/common/pkg/apis/common/v1.JobCondition": schema_pkg_apis_common_v1_JobCondition(ref), - "github.com/kubeflow/common/pkg/apis/common/v1.JobStatus": schema_pkg_apis_common_v1_JobStatus(ref), - "github.com/kubeflow/common/pkg/apis/common/v1.ReplicaSpec": schema_pkg_apis_common_v1_ReplicaSpec(ref), - "github.com/kubeflow/common/pkg/apis/common/v1.ReplicaStatus": schema_pkg_apis_common_v1_ReplicaStatus(ref), - "github.com/kubeflow/common/pkg/apis/common/v1.RunPolicy": schema_pkg_apis_common_v1_RunPolicy(ref), - "github.com/kubeflow/common/pkg/apis/common/v1.SchedulingPolicy": schema_pkg_apis_common_v1_SchedulingPolicy(ref), - "github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1.MXJob": schema_pkg_apis_mxnet_v1_MXJob(ref), - "github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1.MXJobList": schema_pkg_apis_mxnet_v1_MXJobList(ref), - "github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1.MXJobSpec": schema_pkg_apis_mxnet_v1_MXJobSpec(ref), - "github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1.MXJobStatus": schema_pkg_apis_mxnet_v1_MXJobStatus(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.JobCondition": schema_pkg_apis_common_v1_JobCondition(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.JobStatus": schema_pkg_apis_common_v1_JobStatus(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.ReplicaSpec": schema_pkg_apis_common_v1_ReplicaSpec(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.ReplicaStatus": schema_pkg_apis_common_v1_ReplicaStatus(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.RunPolicy": schema_pkg_apis_common_v1_RunPolicy(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.SchedulingPolicy": schema_pkg_apis_common_v1_SchedulingPolicy(ref), + "github.com/kubeflow/training-operator/pkg/apis/mxnet/v1.MXJob": schema_pkg_apis_mxnet_v1_MXJob(ref), + "github.com/kubeflow/training-operator/pkg/apis/mxnet/v1.MXJobList": schema_pkg_apis_mxnet_v1_MXJobList(ref), + "github.com/kubeflow/training-operator/pkg/apis/mxnet/v1.MXJobSpec": schema_pkg_apis_mxnet_v1_MXJobSpec(ref), + "github.com/kubeflow/training-operator/pkg/apis/mxnet/v1.MXJobStatus": schema_pkg_apis_mxnet_v1_MXJobStatus(ref), } } @@ -350,7 +350,7 @@ func schema_pkg_apis_mxnet_v1_MXJob(ref common.ReferenceCallback) common.OpenAPI }, "spec": { SchemaProps: spec.SchemaProps{ - Ref: ref("github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1.MXJobSpec"), + Ref: ref("github.com/kubeflow/training-operator/pkg/apis/mxnet/v1.MXJobSpec"), }, }, "status": { @@ -362,7 +362,7 @@ func schema_pkg_apis_mxnet_v1_MXJob(ref common.ReferenceCallback) common.OpenAPI }, }, Dependencies: []string{ - "github.com/kubeflow/common/pkg/apis/common/v1.JobStatus", "github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1.MXJobSpec", "k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"}, + "github.com/kubeflow/common/pkg/apis/common/v1.JobStatus", "github.com/kubeflow/training-operator/pkg/apis/mxnet/v1.MXJobSpec", "k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"}, } } @@ -398,7 +398,7 @@ func schema_pkg_apis_mxnet_v1_MXJobList(ref common.ReferenceCallback) common.Ope Items: &spec.SchemaOrArray{ Schema: &spec.Schema{ SchemaProps: spec.SchemaProps{ - Ref: ref("github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1.MXJob"), + Ref: ref("github.com/kubeflow/training-operator/pkg/apis/mxnet/v1.MXJob"), }, }, }, @@ -409,7 +409,7 @@ func schema_pkg_apis_mxnet_v1_MXJobList(ref common.ReferenceCallback) common.Ope }, }, Dependencies: []string{ - "github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1.MXJob", "k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"}, + "github.com/kubeflow/training-operator/pkg/apis/mxnet/v1.MXJob", "k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"}, } } diff --git a/pkg/apis/mxnet/validation/validation.go b/pkg/apis/mxnet/validation/validation.go index f2958bdd1c..d408ad7e74 100644 --- a/pkg/apis/mxnet/validation/validation.go +++ b/pkg/apis/mxnet/validation/validation.go @@ -21,7 +21,7 @@ import ( log "github.com/sirupsen/logrus" - mxv1 "github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1" + mxv1 "github.com/kubeflow/training-operator/pkg/apis/mxnet/v1" ) // ValidateV1MXJobSpec checks that the v1.MXJobSpec is valid. diff --git a/pkg/apis/mxnet/validation/validation_test.go b/pkg/apis/mxnet/validation/validation_test.go index 2086884598..b642212ea4 100644 --- a/pkg/apis/mxnet/validation/validation_test.go +++ b/pkg/apis/mxnet/validation/validation_test.go @@ -18,7 +18,7 @@ import ( "testing" commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" - mxv1 "github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1" + mxv1 "github.com/kubeflow/training-operator/pkg/apis/mxnet/v1" v1 "k8s.io/api/core/v1" ) diff --git a/pkg/apis/pytorch/v1/openapi_generated.go b/pkg/apis/pytorch/v1/openapi_generated.go index db09bbb241..eaf4aaac84 100644 --- a/pkg/apis/pytorch/v1/openapi_generated.go +++ b/pkg/apis/pytorch/v1/openapi_generated.go @@ -28,15 +28,15 @@ import ( func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenAPIDefinition { return map[string]common.OpenAPIDefinition{ - "github.com/kubeflow/common/pkg/apis/common/v1.JobCondition": schema_pkg_apis_common_v1_JobCondition(ref), - "github.com/kubeflow/common/pkg/apis/common/v1.JobStatus": schema_pkg_apis_common_v1_JobStatus(ref), - "github.com/kubeflow/common/pkg/apis/common/v1.ReplicaSpec": schema_pkg_apis_common_v1_ReplicaSpec(ref), - "github.com/kubeflow/common/pkg/apis/common/v1.ReplicaStatus": schema_pkg_apis_common_v1_ReplicaStatus(ref), - "github.com/kubeflow/common/pkg/apis/common/v1.RunPolicy": schema_pkg_apis_common_v1_RunPolicy(ref), - "github.com/kubeflow/common/pkg/apis/common/v1.SchedulingPolicy": schema_pkg_apis_common_v1_SchedulingPolicy(ref), - "github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1.PyTorchJob": schema_pkg_apis_pytorch_v1_PyTorchJob(ref), - "github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1.PyTorchJobList": schema_pkg_apis_pytorch_v1_PyTorchJobList(ref), - "github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1.PyTorchJobSpec": schema_pkg_apis_pytorch_v1_PyTorchJobSpec(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.JobCondition": schema_pkg_apis_common_v1_JobCondition(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.JobStatus": schema_pkg_apis_common_v1_JobStatus(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.ReplicaSpec": schema_pkg_apis_common_v1_ReplicaSpec(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.ReplicaStatus": schema_pkg_apis_common_v1_ReplicaStatus(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.RunPolicy": schema_pkg_apis_common_v1_RunPolicy(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.SchedulingPolicy": schema_pkg_apis_common_v1_SchedulingPolicy(ref), + "github.com/kubeflow/training-operator/pkg/apis/pytorch/v1.PyTorchJob": schema_pkg_apis_pytorch_v1_PyTorchJob(ref), + "github.com/kubeflow/training-operator/pkg/apis/pytorch/v1.PyTorchJobList": schema_pkg_apis_pytorch_v1_PyTorchJobList(ref), + "github.com/kubeflow/training-operator/pkg/apis/pytorch/v1.PyTorchJobSpec": schema_pkg_apis_pytorch_v1_PyTorchJobSpec(ref), } } @@ -350,7 +350,7 @@ func schema_pkg_apis_pytorch_v1_PyTorchJob(ref common.ReferenceCallback) common. "spec": { SchemaProps: spec.SchemaProps{ Description: "Specification of the desired state of the PyTorchJob.", - Ref: ref("github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1.PyTorchJobSpec"), + Ref: ref("github.com/kubeflow/training-operator/pkg/apis/pytorch/v1.PyTorchJobSpec"), }, }, "status": { @@ -363,7 +363,7 @@ func schema_pkg_apis_pytorch_v1_PyTorchJob(ref common.ReferenceCallback) common. }, }, Dependencies: []string{ - "github.com/kubeflow/common/pkg/apis/common/v1.JobStatus", "github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1.PyTorchJobSpec", "k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"}, + "github.com/kubeflow/common/pkg/apis/common/v1.JobStatus", "github.com/kubeflow/training-operator/pkg/apis/pytorch/v1.PyTorchJobSpec", "k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"}, } } @@ -401,7 +401,7 @@ func schema_pkg_apis_pytorch_v1_PyTorchJobList(ref common.ReferenceCallback) com Items: &spec.SchemaOrArray{ Schema: &spec.Schema{ SchemaProps: spec.SchemaProps{ - Ref: ref("github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1.PyTorchJob"), + Ref: ref("github.com/kubeflow/training-operator/pkg/apis/pytorch/v1.PyTorchJob"), }, }, }, @@ -412,7 +412,7 @@ func schema_pkg_apis_pytorch_v1_PyTorchJobList(ref common.ReferenceCallback) com }, }, Dependencies: []string{ - "github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1.PyTorchJob", "k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"}, + "github.com/kubeflow/training-operator/pkg/apis/pytorch/v1.PyTorchJob", "k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"}, } } diff --git a/pkg/apis/pytorch/validation/validation.go b/pkg/apis/pytorch/validation/validation.go index 64d30b5938..e41cb80102 100644 --- a/pkg/apis/pytorch/validation/validation.go +++ b/pkg/apis/pytorch/validation/validation.go @@ -19,7 +19,7 @@ import ( commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" - torchv1 "github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1" + torchv1 "github.com/kubeflow/training-operator/pkg/apis/pytorch/v1" ) func ValidateV1PyTorchJobSpec(c *torchv1.PyTorchJobSpec) error { diff --git a/pkg/apis/pytorch/validation/validation_test.go b/pkg/apis/pytorch/validation/validation_test.go index 94756bf021..d2b3459f05 100644 --- a/pkg/apis/pytorch/validation/validation_test.go +++ b/pkg/apis/pytorch/validation/validation_test.go @@ -18,7 +18,7 @@ import ( "testing" commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" - torchv1 "github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1" + torchv1 "github.com/kubeflow/training-operator/pkg/apis/pytorch/v1" v1 "k8s.io/api/core/v1" ) diff --git a/pkg/apis/tensorflow/v1/openapi_generated.go b/pkg/apis/tensorflow/v1/openapi_generated.go index 269f0374ad..2775cafa52 100644 --- a/pkg/apis/tensorflow/v1/openapi_generated.go +++ b/pkg/apis/tensorflow/v1/openapi_generated.go @@ -28,15 +28,15 @@ import ( func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenAPIDefinition { return map[string]common.OpenAPIDefinition{ - "github.com/kubeflow/common/pkg/apis/common/v1.JobCondition": schema_pkg_apis_common_v1_JobCondition(ref), - "github.com/kubeflow/common/pkg/apis/common/v1.JobStatus": schema_pkg_apis_common_v1_JobStatus(ref), - "github.com/kubeflow/common/pkg/apis/common/v1.ReplicaSpec": schema_pkg_apis_common_v1_ReplicaSpec(ref), - "github.com/kubeflow/common/pkg/apis/common/v1.ReplicaStatus": schema_pkg_apis_common_v1_ReplicaStatus(ref), - "github.com/kubeflow/common/pkg/apis/common/v1.RunPolicy": schema_pkg_apis_common_v1_RunPolicy(ref), - "github.com/kubeflow/common/pkg/apis/common/v1.SchedulingPolicy": schema_pkg_apis_common_v1_SchedulingPolicy(ref), - "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1.TFJob": schema_pkg_apis_tensorflow_v1_TFJob(ref), - "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1.TFJobList": schema_pkg_apis_tensorflow_v1_TFJobList(ref), - "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1.TFJobSpec": schema_pkg_apis_tensorflow_v1_TFJobSpec(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.JobCondition": schema_pkg_apis_common_v1_JobCondition(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.JobStatus": schema_pkg_apis_common_v1_JobStatus(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.ReplicaSpec": schema_pkg_apis_common_v1_ReplicaSpec(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.ReplicaStatus": schema_pkg_apis_common_v1_ReplicaStatus(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.RunPolicy": schema_pkg_apis_common_v1_RunPolicy(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.SchedulingPolicy": schema_pkg_apis_common_v1_SchedulingPolicy(ref), + "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1.TFJob": schema_pkg_apis_tensorflow_v1_TFJob(ref), + "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1.TFJobList": schema_pkg_apis_tensorflow_v1_TFJobList(ref), + "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1.TFJobSpec": schema_pkg_apis_tensorflow_v1_TFJobSpec(ref), } } @@ -350,7 +350,7 @@ func schema_pkg_apis_tensorflow_v1_TFJob(ref common.ReferenceCallback) common.Op "spec": { SchemaProps: spec.SchemaProps{ Description: "Specification of the desired state of the TFJob.", - Ref: ref("github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1.TFJobSpec"), + Ref: ref("github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1.TFJobSpec"), }, }, "status": { @@ -363,7 +363,7 @@ func schema_pkg_apis_tensorflow_v1_TFJob(ref common.ReferenceCallback) common.Op }, }, Dependencies: []string{ - "github.com/kubeflow/common/pkg/apis/common/v1.JobStatus", "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1.TFJobSpec", "k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"}, + "github.com/kubeflow/common/pkg/apis/common/v1.JobStatus", "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1.TFJobSpec", "k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"}, } } @@ -401,7 +401,7 @@ func schema_pkg_apis_tensorflow_v1_TFJobList(ref common.ReferenceCallback) commo Items: &spec.SchemaOrArray{ Schema: &spec.Schema{ SchemaProps: spec.SchemaProps{ - Ref: ref("github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1.TFJob"), + Ref: ref("github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1.TFJob"), }, }, }, @@ -412,7 +412,7 @@ func schema_pkg_apis_tensorflow_v1_TFJobList(ref common.ReferenceCallback) commo }, }, Dependencies: []string{ - "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1.TFJob", "k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"}, + "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1.TFJob", "k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"}, } } diff --git a/pkg/apis/tensorflow/validation/validation.go b/pkg/apis/tensorflow/validation/validation.go index 256d25b735..d9b7cb92d2 100644 --- a/pkg/apis/tensorflow/validation/validation.go +++ b/pkg/apis/tensorflow/validation/validation.go @@ -20,7 +20,7 @@ import ( log "github.com/sirupsen/logrus" commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" - tfv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1" + tfv1 "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1" ) // ValidateV1TFJobSpec checks that the v1.TFJobSpec is valid. diff --git a/pkg/apis/tensorflow/validation/validation_test.go b/pkg/apis/tensorflow/validation/validation_test.go index 0384316679..ee316cebaa 100644 --- a/pkg/apis/tensorflow/validation/validation_test.go +++ b/pkg/apis/tensorflow/validation/validation_test.go @@ -18,7 +18,7 @@ import ( "testing" commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" - tfv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1" + tfv1 "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1" v1 "k8s.io/api/core/v1" ) diff --git a/pkg/apis/xgboost/v1/openapi_generated.go b/pkg/apis/xgboost/v1/openapi_generated.go index 25f05a3f5e..56df36d5e1 100644 --- a/pkg/apis/xgboost/v1/openapi_generated.go +++ b/pkg/apis/xgboost/v1/openapi_generated.go @@ -28,15 +28,15 @@ import ( func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenAPIDefinition { return map[string]common.OpenAPIDefinition{ - "github.com/kubeflow/common/pkg/apis/common/v1.JobCondition": schema_pkg_apis_common_v1_JobCondition(ref), - "github.com/kubeflow/common/pkg/apis/common/v1.JobStatus": schema_pkg_apis_common_v1_JobStatus(ref), - "github.com/kubeflow/common/pkg/apis/common/v1.ReplicaSpec": schema_pkg_apis_common_v1_ReplicaSpec(ref), - "github.com/kubeflow/common/pkg/apis/common/v1.ReplicaStatus": schema_pkg_apis_common_v1_ReplicaStatus(ref), - "github.com/kubeflow/common/pkg/apis/common/v1.RunPolicy": schema_pkg_apis_common_v1_RunPolicy(ref), - "github.com/kubeflow/common/pkg/apis/common/v1.SchedulingPolicy": schema_pkg_apis_common_v1_SchedulingPolicy(ref), - "github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1.XGBoostJob": schema_pkg_apis_xgboost_v1_XGBoostJob(ref), - "github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1.XGBoostJobList": schema_pkg_apis_xgboost_v1_XGBoostJobList(ref), - "github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1.XGBoostJobSpec": schema_pkg_apis_xgboost_v1_XGBoostJobSpec(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.JobCondition": schema_pkg_apis_common_v1_JobCondition(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.JobStatus": schema_pkg_apis_common_v1_JobStatus(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.ReplicaSpec": schema_pkg_apis_common_v1_ReplicaSpec(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.ReplicaStatus": schema_pkg_apis_common_v1_ReplicaStatus(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.RunPolicy": schema_pkg_apis_common_v1_RunPolicy(ref), + "github.com/kubeflow/common/pkg/apis/common/v1.SchedulingPolicy": schema_pkg_apis_common_v1_SchedulingPolicy(ref), + "github.com/kubeflow/training-operator/pkg/apis/xgboost/v1.XGBoostJob": schema_pkg_apis_xgboost_v1_XGBoostJob(ref), + "github.com/kubeflow/training-operator/pkg/apis/xgboost/v1.XGBoostJobList": schema_pkg_apis_xgboost_v1_XGBoostJobList(ref), + "github.com/kubeflow/training-operator/pkg/apis/xgboost/v1.XGBoostJobSpec": schema_pkg_apis_xgboost_v1_XGBoostJobSpec(ref), } } @@ -349,7 +349,7 @@ func schema_pkg_apis_xgboost_v1_XGBoostJob(ref common.ReferenceCallback) common. }, "spec": { SchemaProps: spec.SchemaProps{ - Ref: ref("github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1.XGBoostJobSpec"), + Ref: ref("github.com/kubeflow/training-operator/pkg/apis/xgboost/v1.XGBoostJobSpec"), }, }, "status": { @@ -361,7 +361,7 @@ func schema_pkg_apis_xgboost_v1_XGBoostJob(ref common.ReferenceCallback) common. }, }, Dependencies: []string{ - "github.com/kubeflow/common/pkg/apis/common/v1.JobStatus", "github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1.XGBoostJobSpec", "k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"}, + "github.com/kubeflow/common/pkg/apis/common/v1.JobStatus", "github.com/kubeflow/training-operator/pkg/apis/xgboost/v1.XGBoostJobSpec", "k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"}, } } @@ -397,7 +397,7 @@ func schema_pkg_apis_xgboost_v1_XGBoostJobList(ref common.ReferenceCallback) com Items: &spec.SchemaOrArray{ Schema: &spec.Schema{ SchemaProps: spec.SchemaProps{ - Ref: ref("github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1.XGBoostJob"), + Ref: ref("github.com/kubeflow/training-operator/pkg/apis/xgboost/v1.XGBoostJob"), }, }, }, @@ -408,7 +408,7 @@ func schema_pkg_apis_xgboost_v1_XGBoostJobList(ref common.ReferenceCallback) com }, }, Dependencies: []string{ - "github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1.XGBoostJob", "k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"}, + "github.com/kubeflow/training-operator/pkg/apis/xgboost/v1.XGBoostJob", "k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"}, } } diff --git a/pkg/apis/xgboost/validation/validation.go b/pkg/apis/xgboost/validation/validation.go index 0d0175ef91..432d98013a 100644 --- a/pkg/apis/xgboost/validation/validation.go +++ b/pkg/apis/xgboost/validation/validation.go @@ -17,7 +17,7 @@ package validation import ( "fmt" - xgboostv1 "github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1" + xgboostv1 "github.com/kubeflow/training-operator/pkg/apis/xgboost/v1" commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" ) diff --git a/pkg/apis/xgboost/validation/validation_test.go b/pkg/apis/xgboost/validation/validation_test.go index 2173629b64..3d6a56d9b3 100644 --- a/pkg/apis/xgboost/validation/validation_test.go +++ b/pkg/apis/xgboost/validation/validation_test.go @@ -18,7 +18,7 @@ import ( "testing" commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" - xgboostv1 "github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1" + xgboostv1 "github.com/kubeflow/training-operator/pkg/apis/xgboost/v1" v1 "k8s.io/api/core/v1" ) diff --git a/pkg/client/clientset/versioned/clientset.go b/pkg/client/clientset/versioned/clientset.go index 616be6007a..3c6e20e84a 100644 --- a/pkg/client/clientset/versioned/clientset.go +++ b/pkg/client/clientset/versioned/clientset.go @@ -19,7 +19,7 @@ package versioned import ( "fmt" - kubeflowv1 "github.com/kubeflow/tf-operator/pkg/client/clientset/versioned/typed/tensorflow/v1" + kubeflowv1 "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/typed/tensorflow/v1" discovery "k8s.io/client-go/discovery" rest "k8s.io/client-go/rest" flowcontrol "k8s.io/client-go/util/flowcontrol" diff --git a/pkg/client/clientset/versioned/fake/clientset_generated.go b/pkg/client/clientset/versioned/fake/clientset_generated.go index 19e1f6e787..e304d1cbf7 100644 --- a/pkg/client/clientset/versioned/fake/clientset_generated.go +++ b/pkg/client/clientset/versioned/fake/clientset_generated.go @@ -17,9 +17,9 @@ package fake import ( - clientset "github.com/kubeflow/tf-operator/pkg/client/clientset/versioned" - kubeflowv1 "github.com/kubeflow/tf-operator/pkg/client/clientset/versioned/typed/tensorflow/v1" - fakekubeflowv1 "github.com/kubeflow/tf-operator/pkg/client/clientset/versioned/typed/tensorflow/v1/fake" + clientset "github.com/kubeflow/training-operator/pkg/client/clientset/versioned" + kubeflowv1 "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/typed/tensorflow/v1" + fakekubeflowv1 "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/typed/tensorflow/v1/fake" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/watch" "k8s.io/client-go/discovery" diff --git a/pkg/client/clientset/versioned/fake/register.go b/pkg/client/clientset/versioned/fake/register.go index 246d0c88ff..36d9bbc2b4 100644 --- a/pkg/client/clientset/versioned/fake/register.go +++ b/pkg/client/clientset/versioned/fake/register.go @@ -17,7 +17,7 @@ package fake import ( - kubeflowv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1" + kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" schema "k8s.io/apimachinery/pkg/runtime/schema" diff --git a/pkg/client/clientset/versioned/scheme/register.go b/pkg/client/clientset/versioned/scheme/register.go index 5290e5a10b..c0b97b6a53 100644 --- a/pkg/client/clientset/versioned/scheme/register.go +++ b/pkg/client/clientset/versioned/scheme/register.go @@ -17,7 +17,7 @@ package scheme import ( - kubeflowv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1" + kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" schema "k8s.io/apimachinery/pkg/runtime/schema" diff --git a/pkg/client/clientset/versioned/typed/tensorflow/v1/fake/fake_tensorflow_client.go b/pkg/client/clientset/versioned/typed/tensorflow/v1/fake/fake_tensorflow_client.go index 0f9e0c0c33..b162682d33 100644 --- a/pkg/client/clientset/versioned/typed/tensorflow/v1/fake/fake_tensorflow_client.go +++ b/pkg/client/clientset/versioned/typed/tensorflow/v1/fake/fake_tensorflow_client.go @@ -17,7 +17,7 @@ package fake import ( - v1 "github.com/kubeflow/tf-operator/pkg/client/clientset/versioned/typed/tensorflow/v1" + v1 "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/typed/tensorflow/v1" rest "k8s.io/client-go/rest" testing "k8s.io/client-go/testing" ) diff --git a/pkg/client/clientset/versioned/typed/tensorflow/v1/fake/fake_tfjob.go b/pkg/client/clientset/versioned/typed/tensorflow/v1/fake/fake_tfjob.go index 6b3a4f0f37..3ce015f043 100644 --- a/pkg/client/clientset/versioned/typed/tensorflow/v1/fake/fake_tfjob.go +++ b/pkg/client/clientset/versioned/typed/tensorflow/v1/fake/fake_tfjob.go @@ -19,7 +19,7 @@ package fake import ( "context" - tensorflowv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1" + tensorflowv1 "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" labels "k8s.io/apimachinery/pkg/labels" schema "k8s.io/apimachinery/pkg/runtime/schema" diff --git a/pkg/client/clientset/versioned/typed/tensorflow/v1/tensorflow_client.go b/pkg/client/clientset/versioned/typed/tensorflow/v1/tensorflow_client.go index 813aa3766a..a49ff8af7a 100644 --- a/pkg/client/clientset/versioned/typed/tensorflow/v1/tensorflow_client.go +++ b/pkg/client/clientset/versioned/typed/tensorflow/v1/tensorflow_client.go @@ -17,8 +17,8 @@ package v1 import ( - v1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1" - "github.com/kubeflow/tf-operator/pkg/client/clientset/versioned/scheme" + v1 "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1" + "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/scheme" rest "k8s.io/client-go/rest" ) diff --git a/pkg/client/clientset/versioned/typed/tensorflow/v1/tfjob.go b/pkg/client/clientset/versioned/typed/tensorflow/v1/tfjob.go index 3f20a94adc..1d498eed3e 100644 --- a/pkg/client/clientset/versioned/typed/tensorflow/v1/tfjob.go +++ b/pkg/client/clientset/versioned/typed/tensorflow/v1/tfjob.go @@ -20,8 +20,8 @@ import ( "context" "time" - v1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1" - scheme "github.com/kubeflow/tf-operator/pkg/client/clientset/versioned/scheme" + v1 "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1" + scheme "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/scheme" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" types "k8s.io/apimachinery/pkg/types" watch "k8s.io/apimachinery/pkg/watch" diff --git a/pkg/client/informers/externalversions/factory.go b/pkg/client/informers/externalversions/factory.go index 838948ef1b..8d91be1dce 100644 --- a/pkg/client/informers/externalversions/factory.go +++ b/pkg/client/informers/externalversions/factory.go @@ -21,9 +21,9 @@ import ( sync "sync" time "time" - versioned "github.com/kubeflow/tf-operator/pkg/client/clientset/versioned" - internalinterfaces "github.com/kubeflow/tf-operator/pkg/client/informers/externalversions/internalinterfaces" - tensorflow "github.com/kubeflow/tf-operator/pkg/client/informers/externalversions/tensorflow" + versioned "github.com/kubeflow/training-operator/pkg/client/clientset/versioned" + internalinterfaces "github.com/kubeflow/training-operator/pkg/client/informers/externalversions/internalinterfaces" + tensorflow "github.com/kubeflow/training-operator/pkg/client/informers/externalversions/tensorflow" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" schema "k8s.io/apimachinery/pkg/runtime/schema" diff --git a/pkg/client/informers/externalversions/generic.go b/pkg/client/informers/externalversions/generic.go index 242459ff0e..365c83578c 100644 --- a/pkg/client/informers/externalversions/generic.go +++ b/pkg/client/informers/externalversions/generic.go @@ -19,7 +19,7 @@ package externalversions import ( "fmt" - v1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1" + v1 "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1" schema "k8s.io/apimachinery/pkg/runtime/schema" cache "k8s.io/client-go/tools/cache" ) diff --git a/pkg/client/informers/externalversions/internalinterfaces/factory_interfaces.go b/pkg/client/informers/externalversions/internalinterfaces/factory_interfaces.go index 05a0ed58ca..05675010a3 100644 --- a/pkg/client/informers/externalversions/internalinterfaces/factory_interfaces.go +++ b/pkg/client/informers/externalversions/internalinterfaces/factory_interfaces.go @@ -19,7 +19,7 @@ package internalinterfaces import ( time "time" - versioned "github.com/kubeflow/tf-operator/pkg/client/clientset/versioned" + versioned "github.com/kubeflow/training-operator/pkg/client/clientset/versioned" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" cache "k8s.io/client-go/tools/cache" diff --git a/pkg/client/informers/externalversions/tensorflow/interface.go b/pkg/client/informers/externalversions/tensorflow/interface.go index c6c7549586..1608b7325b 100644 --- a/pkg/client/informers/externalversions/tensorflow/interface.go +++ b/pkg/client/informers/externalversions/tensorflow/interface.go @@ -17,8 +17,8 @@ package tensorflow import ( - internalinterfaces "github.com/kubeflow/tf-operator/pkg/client/informers/externalversions/internalinterfaces" - v1 "github.com/kubeflow/tf-operator/pkg/client/informers/externalversions/tensorflow/v1" + internalinterfaces "github.com/kubeflow/training-operator/pkg/client/informers/externalversions/internalinterfaces" + v1 "github.com/kubeflow/training-operator/pkg/client/informers/externalversions/tensorflow/v1" ) // Interface provides access to each of this group's versions. diff --git a/pkg/client/informers/externalversions/tensorflow/v1/interface.go b/pkg/client/informers/externalversions/tensorflow/v1/interface.go index 0e6c8fc3db..92906d0775 100644 --- a/pkg/client/informers/externalversions/tensorflow/v1/interface.go +++ b/pkg/client/informers/externalversions/tensorflow/v1/interface.go @@ -17,7 +17,7 @@ package v1 import ( - internalinterfaces "github.com/kubeflow/tf-operator/pkg/client/informers/externalversions/internalinterfaces" + internalinterfaces "github.com/kubeflow/training-operator/pkg/client/informers/externalversions/internalinterfaces" ) // Interface provides access to all the informers in this group version. diff --git a/pkg/client/informers/externalversions/tensorflow/v1/tfjob.go b/pkg/client/informers/externalversions/tensorflow/v1/tfjob.go index 07bb87bd5c..a36197eb86 100644 --- a/pkg/client/informers/externalversions/tensorflow/v1/tfjob.go +++ b/pkg/client/informers/externalversions/tensorflow/v1/tfjob.go @@ -20,10 +20,10 @@ import ( "context" time "time" - tensorflowv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1" - versioned "github.com/kubeflow/tf-operator/pkg/client/clientset/versioned" - internalinterfaces "github.com/kubeflow/tf-operator/pkg/client/informers/externalversions/internalinterfaces" - v1 "github.com/kubeflow/tf-operator/pkg/client/listers/tensorflow/v1" + tensorflowv1 "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1" + versioned "github.com/kubeflow/training-operator/pkg/client/clientset/versioned" + internalinterfaces "github.com/kubeflow/training-operator/pkg/client/informers/externalversions/internalinterfaces" + v1 "github.com/kubeflow/training-operator/pkg/client/listers/tensorflow/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" watch "k8s.io/apimachinery/pkg/watch" diff --git a/pkg/client/listers/tensorflow/v1/tfjob.go b/pkg/client/listers/tensorflow/v1/tfjob.go index 6a8985a729..b477697309 100644 --- a/pkg/client/listers/tensorflow/v1/tfjob.go +++ b/pkg/client/listers/tensorflow/v1/tfjob.go @@ -17,7 +17,7 @@ package v1 import ( - v1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1" + v1 "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/tools/cache" diff --git a/pkg/common/util/v1/testutil/const.go b/pkg/common/util/v1/testutil/const.go index 55474ce80a..1260bb4710 100644 --- a/pkg/common/util/v1/testutil/const.go +++ b/pkg/common/util/v1/testutil/const.go @@ -19,7 +19,7 @@ import ( ) const ( - TestImageName = "test-image-for-kubeflow-tf-operator:latest" + TestImageName = "test-image-for-kubeflow-training-operator:latest" TestTFJobName = "test-tfjob" LabelWorker = "worker" LabelPS = "ps" diff --git a/pkg/common/util/v1/testutil/pod.go b/pkg/common/util/v1/testutil/pod.go index 16fb3f3e36..adce63fa32 100644 --- a/pkg/common/util/v1/testutil/pod.go +++ b/pkg/common/util/v1/testutil/pod.go @@ -22,7 +22,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/cache" - tfv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1" + tfv1 "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1" ) const ( diff --git a/pkg/common/util/v1/testutil/service.go b/pkg/common/util/v1/testutil/service.go index 5a64083819..2bf6448f5f 100644 --- a/pkg/common/util/v1/testutil/service.go +++ b/pkg/common/util/v1/testutil/service.go @@ -22,7 +22,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/cache" - tfv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1" + tfv1 "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1" ) func NewBaseService(name string, tfJob *tfv1.TFJob, t *testing.T) *v1.Service { diff --git a/pkg/common/util/v1/testutil/tfjob.go b/pkg/common/util/v1/testutil/tfjob.go index 5cd7e77eb6..89d0430c12 100644 --- a/pkg/common/util/v1/testutil/tfjob.go +++ b/pkg/common/util/v1/testutil/tfjob.go @@ -21,7 +21,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" - tfv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1" + tfv1 "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1" ) func NewTFJobWithCleanPolicy(chief, worker, ps int, policy commonv1.CleanPodPolicy) *tfv1.TFJob { diff --git a/pkg/common/util/v1/testutil/util.go b/pkg/common/util/v1/testutil/util.go index 07507e8131..5337ad04f2 100644 --- a/pkg/common/util/v1/testutil/util.go +++ b/pkg/common/util/v1/testutil/util.go @@ -19,7 +19,7 @@ import ( "testing" common "github.com/kubeflow/common/pkg/apis/common/v1" - tfv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1" + tfv1 "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" @@ -40,7 +40,7 @@ var ( // key function but it should be just fine for non delete events. KeyFunc = cache.DeletionHandlingMetaNamespaceKeyFunc GroupName = tfv1.GroupVersion.Group - ControllerName = "tf-operator" + ControllerName = "training-operator" ) func GenLabels(jobName string) map[string]string { diff --git a/pkg/common/util/v1/unstructured/informer.go b/pkg/common/util/v1/unstructured/informer.go index ba8edaf556..6c976a1e33 100644 --- a/pkg/common/util/v1/unstructured/informer.go +++ b/pkg/common/util/v1/unstructured/informer.go @@ -14,7 +14,7 @@ // Package unstructured is the package for unstructured informer, // which is from https://github.com/argoproj/argo/blob/master/util/unstructured/unstructured.go -// This is a temporary solution for https://github.com/kubeflow/tf-operator/issues/561 +// This is a temporary solution for https://github.com/kubeflow/training-operator/issues/561 package unstructured import ( @@ -29,8 +29,8 @@ import ( "k8s.io/client-go/dynamic" "k8s.io/client-go/tools/cache" - informer "github.com/kubeflow/tf-operator/pkg/client/informers/externalversions/tensorflow/v1" - lister "github.com/kubeflow/tf-operator/pkg/client/listers/tensorflow/v1" + informer "github.com/kubeflow/training-operator/pkg/client/informers/externalversions/tensorflow/v1" + lister "github.com/kubeflow/training-operator/pkg/client/listers/tensorflow/v1" ) type UnstructuredInformer struct { diff --git a/pkg/controller.v1/mxnet/mxjob_controller.go b/pkg/controller.v1/mxnet/mxjob_controller.go index 5c17ce3814..908190a397 100644 --- a/pkg/controller.v1/mxnet/mxjob_controller.go +++ b/pkg/controller.v1/mxnet/mxjob_controller.go @@ -22,7 +22,7 @@ import ( "k8s.io/client-go/informers" - "github.com/kubeflow/tf-operator/pkg/apis/mxnet/validation" + "github.com/kubeflow/training-operator/pkg/apis/mxnet/validation" "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/handler" @@ -34,9 +34,9 @@ import ( "github.com/kubeflow/common/pkg/controller.v1/control" "github.com/kubeflow/common/pkg/controller.v1/expectation" commonutil "github.com/kubeflow/common/pkg/util" - mxjobv1 "github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1" - trainingoperatorcommon "github.com/kubeflow/tf-operator/pkg/common" - "github.com/kubeflow/tf-operator/pkg/common/util" + mxjobv1 "github.com/kubeflow/training-operator/pkg/apis/mxnet/v1" + trainingoperatorcommon "github.com/kubeflow/training-operator/pkg/common" + "github.com/kubeflow/training-operator/pkg/common/util" "github.com/sirupsen/logrus" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" diff --git a/pkg/controller.v1/mxnet/mxnet.go b/pkg/controller.v1/mxnet/mxnet.go index 8348313d72..7e13d32c17 100644 --- a/pkg/controller.v1/mxnet/mxnet.go +++ b/pkg/controller.v1/mxnet/mxnet.go @@ -22,7 +22,7 @@ import ( commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" "github.com/kubeflow/common/pkg/controller.v1/common" - mxnetv1 "github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1" + mxnetv1 "github.com/kubeflow/training-operator/pkg/apis/mxnet/v1" corev1 "k8s.io/api/core/v1" ) diff --git a/pkg/controller.v1/mxnet/suite_test.go b/pkg/controller.v1/mxnet/suite_test.go index 9f2cb3bf57..7a8ef8dd89 100644 --- a/pkg/controller.v1/mxnet/suite_test.go +++ b/pkg/controller.v1/mxnet/suite_test.go @@ -18,7 +18,7 @@ import ( "path/filepath" "testing" - v1 "github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1" + v1 "github.com/kubeflow/training-operator/pkg/apis/mxnet/v1" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" diff --git a/pkg/controller.v1/pytorch/pytorch.go b/pkg/controller.v1/pytorch/pytorch.go index e40b3ef0ff..1b1c18ef03 100644 --- a/pkg/controller.v1/pytorch/pytorch.go +++ b/pkg/controller.v1/pytorch/pytorch.go @@ -20,7 +20,7 @@ import ( "strings" commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" - pytorchv1 "github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1" + pytorchv1 "github.com/kubeflow/training-operator/pkg/apis/pytorch/v1" corev1 "k8s.io/api/core/v1" ) diff --git a/pkg/controller.v1/pytorch/pytorchjob_controller.go b/pkg/controller.v1/pytorch/pytorchjob_controller.go index dc8fff40cf..7966ba24d4 100644 --- a/pkg/controller.v1/pytorch/pytorchjob_controller.go +++ b/pkg/controller.v1/pytorch/pytorchjob_controller.go @@ -19,7 +19,7 @@ import ( "fmt" "reflect" - "github.com/kubeflow/tf-operator/pkg/apis/pytorch/validation" + "github.com/kubeflow/training-operator/pkg/apis/pytorch/validation" "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/handler" @@ -27,8 +27,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/source" commonutil "github.com/kubeflow/common/pkg/util" - trainingoperatorcommon "github.com/kubeflow/tf-operator/pkg/common" - "github.com/kubeflow/tf-operator/pkg/common/util" + trainingoperatorcommon "github.com/kubeflow/training-operator/pkg/common" + "github.com/kubeflow/training-operator/pkg/common/util" "k8s.io/apimachinery/pkg/api/meta" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "sigs.k8s.io/controller-runtime/pkg/event" @@ -38,7 +38,7 @@ import ( "github.com/kubeflow/common/pkg/controller.v1/common" "github.com/kubeflow/common/pkg/controller.v1/control" "github.com/kubeflow/common/pkg/controller.v1/expectation" - pytorchv1 "github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1" + pytorchv1 "github.com/kubeflow/training-operator/pkg/apis/pytorch/v1" "github.com/sirupsen/logrus" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" diff --git a/pkg/controller.v1/pytorch/suite_test.go b/pkg/controller.v1/pytorch/suite_test.go index ab534b4c0f..fc2e4cf8b0 100644 --- a/pkg/controller.v1/pytorch/suite_test.go +++ b/pkg/controller.v1/pytorch/suite_test.go @@ -18,7 +18,7 @@ import ( "path/filepath" "testing" - v1 "github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1" + v1 "github.com/kubeflow/training-operator/pkg/apis/pytorch/v1" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" diff --git a/pkg/controller.v1/register_controller.go b/pkg/controller.v1/register_controller.go index 23e66ffc43..4613bdb921 100644 --- a/pkg/controller.v1/register_controller.go +++ b/pkg/controller.v1/register_controller.go @@ -18,14 +18,14 @@ import ( "fmt" "strings" - mxnetv1 "github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1" - pytorchv1 "github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1" - tensorflowv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1" - xgboostv1 "github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1" - mxnetcontroller "github.com/kubeflow/tf-operator/pkg/controller.v1/mxnet" - pytorchcontroller "github.com/kubeflow/tf-operator/pkg/controller.v1/pytorch" - tensorflowcontroller "github.com/kubeflow/tf-operator/pkg/controller.v1/tensorflow" - xgboostcontroller "github.com/kubeflow/tf-operator/pkg/controller.v1/xgboost" + mxnetv1 "github.com/kubeflow/training-operator/pkg/apis/mxnet/v1" + pytorchv1 "github.com/kubeflow/training-operator/pkg/apis/pytorch/v1" + tensorflowv1 "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1" + xgboostv1 "github.com/kubeflow/training-operator/pkg/apis/xgboost/v1" + mxnetcontroller "github.com/kubeflow/training-operator/pkg/controller.v1/mxnet" + pytorchcontroller "github.com/kubeflow/training-operator/pkg/controller.v1/pytorch" + tensorflowcontroller "github.com/kubeflow/training-operator/pkg/controller.v1/tensorflow" + xgboostcontroller "github.com/kubeflow/training-operator/pkg/controller.v1/xgboost" "sigs.k8s.io/controller-runtime/pkg/manager" ) diff --git a/pkg/controller.v1/tensorflow/suite_test.go b/pkg/controller.v1/tensorflow/suite_test.go index b672e62aa8..640c6284c5 100644 --- a/pkg/controller.v1/tensorflow/suite_test.go +++ b/pkg/controller.v1/tensorflow/suite_test.go @@ -27,7 +27,7 @@ import ( logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" - tensorflowv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1" + tensorflowv1 "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1" //+kubebuilder:scaffold:imports ) diff --git a/pkg/controller.v1/tensorflow/tensorflow.go b/pkg/controller.v1/tensorflow/tensorflow.go index 3acf078e3e..1ed851f8ab 100644 --- a/pkg/controller.v1/tensorflow/tensorflow.go +++ b/pkg/controller.v1/tensorflow/tensorflow.go @@ -23,7 +23,7 @@ import ( "strings" "github.com/kubeflow/common/pkg/controller.v1/common" - tfv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1" + tfv1 "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1" ) const ( diff --git a/pkg/controller.v1/tensorflow/tfjob_controller.go b/pkg/controller.v1/tensorflow/tfjob_controller.go index 98b730b21d..fa3fce0c6e 100644 --- a/pkg/controller.v1/tensorflow/tfjob_controller.go +++ b/pkg/controller.v1/tensorflow/tfjob_controller.go @@ -30,11 +30,11 @@ import ( "github.com/kubeflow/common/pkg/controller.v1/expectation" commonutil "github.com/kubeflow/common/pkg/util" train_util "github.com/kubeflow/common/pkg/util/train" - tensorflowv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1" - tfv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1" - "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/validation" - trainingoperatorcommon "github.com/kubeflow/tf-operator/pkg/common" - "github.com/kubeflow/tf-operator/pkg/common/util" + tensorflowv1 "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1" + tfv1 "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1" + "github.com/kubeflow/training-operator/pkg/apis/tensorflow/validation" + trainingoperatorcommon "github.com/kubeflow/training-operator/pkg/common" + "github.com/kubeflow/training-operator/pkg/common/util" "github.com/sirupsen/logrus" corev1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1" diff --git a/pkg/controller.v1/tensorflow/util.go b/pkg/controller.v1/tensorflow/util.go index 93aa781544..3bf0f727fc 100644 --- a/pkg/controller.v1/tensorflow/util.go +++ b/pkg/controller.v1/tensorflow/util.go @@ -16,7 +16,7 @@ package tensorflow import ( commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" - tfv1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1" + tfv1 "github.com/kubeflow/training-operator/pkg/apis/tensorflow/v1" corev1 "k8s.io/api/core/v1" ) @@ -69,7 +69,7 @@ func setRestartPolicy(podTemplateSpec *corev1.PodTemplateSpec, spec *commonv1.Re } // isDistributed returns if the TFJob is a distributed training job. -// Ref https://github.com/kubeflow/tf-operator/issues/1078. +// Ref https://github.com/kubeflow/training-operator/issues/1078. // originally from pkg/controller.v1/tensorflow/pod.go (deleted) func isDistributed(tfjob *tfv1.TFJob) bool { replicas := tfjob.Spec.TFReplicaSpecs diff --git a/pkg/controller.v1/xgboost/suite_test.go b/pkg/controller.v1/xgboost/suite_test.go index bfd9b183f7..28d71e99fd 100644 --- a/pkg/controller.v1/xgboost/suite_test.go +++ b/pkg/controller.v1/xgboost/suite_test.go @@ -27,7 +27,7 @@ import ( logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" - xgboostv1 "github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1" + xgboostv1 "github.com/kubeflow/training-operator/pkg/apis/xgboost/v1" //+kubebuilder:scaffold:imports ) diff --git a/pkg/controller.v1/xgboost/xgboost.go b/pkg/controller.v1/xgboost/xgboost.go index 36d2bddf3d..cf01f6c7bd 100644 --- a/pkg/controller.v1/xgboost/xgboost.go +++ b/pkg/controller.v1/xgboost/xgboost.go @@ -20,7 +20,7 @@ import ( "strings" commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" - xgboostv1 "github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1" + xgboostv1 "github.com/kubeflow/training-operator/pkg/apis/xgboost/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) diff --git a/pkg/controller.v1/xgboost/xgboostjob_controller.go b/pkg/controller.v1/xgboost/xgboostjob_controller.go index 8acd45a705..480fdc488e 100644 --- a/pkg/controller.v1/xgboost/xgboostjob_controller.go +++ b/pkg/controller.v1/xgboost/xgboostjob_controller.go @@ -20,7 +20,7 @@ import ( "k8s.io/client-go/informers" - "github.com/kubeflow/tf-operator/pkg/apis/xgboost/validation" + "github.com/kubeflow/training-operator/pkg/apis/xgboost/validation" "sigs.k8s.io/controller-runtime/pkg/predicate" @@ -36,7 +36,7 @@ import ( "github.com/kubeflow/common/pkg/controller.v1/expectation" commonutil "github.com/kubeflow/common/pkg/util" logger "github.com/kubeflow/common/pkg/util" - "github.com/kubeflow/tf-operator/pkg/common/util" + "github.com/kubeflow/training-operator/pkg/common/util" "github.com/sirupsen/logrus" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -58,8 +58,8 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" - xgboostv1 "github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1" - trainingoperatorcommon "github.com/kubeflow/tf-operator/pkg/common" + xgboostv1 "github.com/kubeflow/training-operator/pkg/apis/xgboost/v1" + trainingoperatorcommon "github.com/kubeflow/training-operator/pkg/common" ) const ( diff --git a/py/kubeflow/tf_operator/prow.py b/py/kubeflow/tf_operator/prow.py index 6266c9486c..b402cd190e 100644 --- a/py/kubeflow/tf_operator/prow.py +++ b/py/kubeflow/tf_operator/prow.py @@ -27,7 +27,7 @@ # Default repository organization and name. # This should match the values used in Go imports. GO_REPO_OWNER = "kubeflow" -GO_REPO_NAME = "tf-operator" +GO_REPO_NAME = "training-operator" GCS_REGEX = re.compile("gs://([^/]*)/(.*)") diff --git a/py/kubeflow/tf_operator/prow_test.py b/py/kubeflow/tf_operator/prow_test.py index d1dbc88437..069fb52e73 100644 --- a/py/kubeflow/tf_operator/prow_test.py +++ b/py/kubeflow/tf_operator/prow_test.py @@ -32,7 +32,7 @@ def testCreateStartedPeriodic(self, mock_time): # pylint: disable=no-self-use expected = { "timestamp": 1000, "repos": { - "kubeflow/tf-operator": "abcd", + "kubeflow/training-operator": "abcd", }, } blob.upload_from_string.assert_called_once_with(json.dumps(expected)) diff --git a/py/kubeflow/tf_operator/release.py b/py/kubeflow/tf_operator/release.py index 916349ebcf..b349fb29db 100755 --- a/py/kubeflow/tf_operator/release.py +++ b/py/kubeflow/tf_operator/release.py @@ -24,7 +24,7 @@ # on PROW. But we choose sensible defaults so that we can run locally without # setting defaults. REPO_ORG = os.getenv("REPO_OWNER", "kubeflow") -REPO_NAME = os.getenv("REPO_NAME", "tf-operator") +REPO_NAME = os.getenv("REPO_NAME", "training-operator") RESULTS_BUCKET = "kubeflow-ci-results" JOB_NAME = "tf-k8s-postsubmit" @@ -145,16 +145,16 @@ def build_operator_image(root_dir, commit = build_and_push_image.GetGitHash(root_dir) targets = [ - "github.com/kubeflow/tf-operator/cmd/training-operator.v1", + "github.com/kubeflow/training-operator/cmd/training-operator.v1", ] for t in targets: if t in [ - "github.com/kubeflow/tf-operator/cmd/training-operator.v1" + "github.com/kubeflow/training-operator/cmd/training-operator.v1" ]: util.run([ "go", "install", "-ldflags", - '''-X github.com/kubeflow/tf-operator/pkg/version.GitSHA={} - -X github.com/kubeflow/tf-operator/pkg/version.Version={}'''.format( + '''-X github.com/kubeflow/training-operator/pkg/version.GitSHA={} + -X github.com/kubeflow/training-operator/pkg/version.Version={}'''.format( commit, version_tag), t ]) continue @@ -405,7 +405,7 @@ def clone_postsubmit(args): # TODO(jlewi): Delete this function once -# https://github.com/kubeflow/tf-operator/issues/189 is fixed. +# https://github.com/kubeflow/training-operator/issues/189 is fixed. def build_commit(args, branches): top_dir = args.src_dir or tempfile.mkdtemp(prefix="tmpTFJobSrc") logging.info("Top level directory for source: %s", top_dir) @@ -428,14 +428,14 @@ def build_commit(args, branches): # TODO(jlewi): Delete this function once -# https://github.com/kubeflow/tf-operator/issues/189 is fixed. +# https://github.com/kubeflow/training-operator/issues/189 is fixed. def build_postsubmit(args): """Build the artifacts from a postsubmit.""" build_commit(args, None) # TODO(jlewi): Delete this function once -# https://github.com/kubeflow/tf-operator/issues/189 is fixed. +# https://github.com/kubeflow/training-operator/issues/189 is fixed. def build_pr(args): """Build the artifacts from a postsubmit.""" branches = ["pull/{0}/head:pr".format(args.pr)] diff --git a/py/kubeflow/tf_operator/release_test.py b/py/kubeflow/tf_operator/release_test.py index 5e498e6184..3bbc42605e 100644 --- a/py/kubeflow/tf_operator/release_test.py +++ b/py/kubeflow/tf_operator/release_test.py @@ -17,17 +17,17 @@ def test_build_postsubmit( # pylint: disable=no-self-use _mock_makedirs): # Make sure REPO_OWNER and REPO_NAME aren't changed by the environment release.REPO_ORG = "kubeflow" - release.REPO_NAME = "tf-operator" + release.REPO_NAME = "training-operator" parser = release.build_parser() args = parser.parse_args(["postsubmit", "--src_dir=/top/src_dir"]) release.build_postsubmit(args) mock_build_and_push.assert_called_once_with( - '/top/src_dir/go', '/top/src_dir/go/src/github.com/kubeflow/tf-operator', + '/top/src_dir/go', '/top/src_dir/go/src/github.com/kubeflow/training-operator', mock.ANY) mock_clone.assert_called_once_with('/top/src_dir/git_tensorflow_k8s', - 'kubeflow', 'tf-operator', None, None) + 'kubeflow', 'training-operator', None, None) @mock.patch("kubeflow.tf_operator.release.os.makedirs") @mock.patch("kubeflow.tf_operator.release.os.symlink") @@ -43,10 +43,10 @@ def test_build_pr( # pylint: disable=no-self-use release.build_pr(args) mock_build_and_push.assert_called_once_with( - '/top/src_dir/go', '/top/src_dir/go/src/github.com/kubeflow/tf-operator', + '/top/src_dir/go', '/top/src_dir/go/src/github.com/kubeflow/training-operator', mock.ANY) mock_clone.assert_called_once_with("/top/src_dir/git_tensorflow_k8s", - "kubeflow", "tf-operator", "22", + "kubeflow", "training-operator", "22", ["pull/10/head:pr"]) def test_update_values(self): diff --git a/py/kubeflow/tf_operator/simple_tfjob_tests.py b/py/kubeflow/tf_operator/simple_tfjob_tests.py index a213e7de8c..a0c30cbcc6 100644 --- a/py/kubeflow/tf_operator/simple_tfjob_tests.py +++ b/py/kubeflow/tf_operator/simple_tfjob_tests.py @@ -67,7 +67,7 @@ def run_simple_tfjob(self, component): api_client, self.namespace, results) if creation_failures: # TODO(jlewi): Starting with - # https://github.com/kubeflow/tf-operator/pull/646 the number of events + # https://github.com/kubeflow/training-operator/pull/646 the number of events # no longer seems to match the expected; it looks like maybe events # are being combined? For now we just log a warning rather than an # error. diff --git a/py/kubeflow/tf_operator/util.py b/py/kubeflow/tf_operator/util.py index b643630d56..75d84f0f06 100755 --- a/py/kubeflow/tf_operator/util.py +++ b/py/kubeflow/tf_operator/util.py @@ -30,7 +30,7 @@ # We default to environment variables so that it can be set correctly when # running under prow. MASTER_REPO_OWNER = os.getenv("REPO_OWNER", "kubeflow") -MASTER_REPO_NAME = os.getenv("REPO_NAME", "tf-operator") +MASTER_REPO_NAME = os.getenv("REPO_NAME", "training-operator") # TODO(jlewi): Should we stream the output by polling the subprocess? diff --git a/scripts/setup-tf-operator.sh b/scripts/setup-tf-operator.sh index c5728c2ac8..95cbe19593 100755 --- a/scripts/setup-tf-operator.sh +++ b/scripts/setup-tf-operator.sh @@ -17,14 +17,13 @@ # This shell script is used to build a cluster and create a namespace from our # argo workflow - set -o errexit set -o nounset set -o pipefail CLUSTER_NAME="${CLUSTER_NAME}" REGION="${AWS_REGION:-us-west-2}" -REGISTRY="${ECR_REGISTRY:-public.ecr.aws/j1r0q0g6/training/tf-operator}" +REGISTRY="${ECR_REGISTRY:-public.ecr.aws/j1r0q0g6/training/training-operator}" VERSION="${PULL_BASE_SHA}" GO_DIR=${GOPATH}/src/github.com/${REPO_OWNER}/${REPO_NAME} @@ -33,7 +32,7 @@ aws eks update-kubeconfig --region=${REGION} --name=${CLUSTER_NAME} echo "Update tf operator manifest with new name $REGISTRY and tag $VERSION" cd manifests/overlays/standalone -kustomize edit set image public.ecr.aws/j1r0q0g6/training/tf-operator=${REGISTRY}:${VERSION} +kustomize edit set image public.ecr.aws/j1r0q0g6/training/training-operator=${REGISTRY}:${VERSION} echo "Installing tf operator manifests" kustomize build . | kubectl apply -f - @@ -41,7 +40,7 @@ kustomize build . | kubectl apply -f - TIMEOUT=30 until kubectl get pods -n kubeflow | grep tf-job-operator | grep 1/1 || [[ $TIMEOUT -eq 1 ]]; do sleep 10 - TIMEOUT=$(( TIMEOUT - 1 )) + TIMEOUT=$((TIMEOUT - 1)) done kubectl describe all -n kubeflow kubectl describe pods -n kubeflow diff --git a/sdk/python/.openapi-generator/VERSION b/sdk/python/.openapi-generator/VERSION new file mode 100644 index 0000000000..ecedc98d1d --- /dev/null +++ b/sdk/python/.openapi-generator/VERSION @@ -0,0 +1 @@ +4.3.1 \ No newline at end of file diff --git a/sdk/python/setup.py b/sdk/python/setup.py index 415b5eb9e5..bb38c85019 100644 --- a/sdk/python/setup.py +++ b/sdk/python/setup.py @@ -29,7 +29,7 @@ author="Kubeflow Authors", author_email='hejinchi@cn.ibm.com', license="Apache License Version 2.0", - url="https://github.com/kubeflow/tf-operator/sdk/python", + url="https://github.com/kubeflow/training-operator/sdk/python", description="Training Operator Python SDK", long_description="Training Operator Python SDK", packages=setuptools.find_packages( diff --git a/submit_release_job.sh b/submit_release_job.sh index 1a57f55f42..79c14958d1 100755 --- a/submit_release_job.sh +++ b/submit_release_job.sh @@ -10,23 +10,22 @@ set -ex COMMIT=$1 -ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -JOB_NAME="tf-operator-release" -JOB_TYPE=tf-operator-release +JOB_NAME="training-operator-release" +JOB_TYPE=training-operator-release BUILD_NUMBER=$(uuidgen) BUILD_NUMBER=${BUILD_NUMBER:0:4} REPO_OWNER=kubeflow -REPO_NAME=tf-operator +REPO_NAME=training-operator ENV=test -DATE=`date +%Y%m%d` +DATE=$(date +%Y%m%d) PULL_BASE_SHA=${COMMIT:0:8} VERSION_TAG="v${DATE}-${PULL_BASE_SHA}" - PROW_VAR="JOB_NAME=${JOB_NAME},JOB_TYPE=${JOB_TYPE},REPO_NAME=${REPO_NAME}" -PROW_VAR="${PROW_VAR},REPO_OWNER=${REPO_OWNER},BUILD_NUMBER=${BUILD_NUMBER}" -PROW_VAR="${PROW_VAR},PULL_BASE_SHA=${PULL_BASE_SHA}" +PROW_VAR="${PROW_VAR},REPO_OWNER=${REPO_OWNER},BUILD_NUMBER=${BUILD_NUMBER}" +PROW_VAR="${PROW_VAR},PULL_BASE_SHA=${PULL_BASE_SHA}" cd ${ROOT}/test/workflows diff --git a/test/test-app/components/params.libsonnet b/test/test-app/components/params.libsonnet index 431a6bc481..89fdac1fbe 100644 --- a/test/test-app/components/params.libsonnet +++ b/test/test-app/components/params.libsonnet @@ -21,7 +21,7 @@ tfAmbassadorImage: "quay.io/datawire/ambassador:0.30.1", tfAmbassadorServiceType: "ClusterIP", tfDefaultImage: "null", - tfJobImage: "gcr.io/kubeflow-images-public/tf_operator:kubeflow-tf-operator-postsubmit-v2-70cafb1-271-1911", + tfJobImage: "gcr.io/kubeflow-images-public/tf_operator:kubeflow-training-operator-postsubmit-v2-70cafb1-271-1911", tfJobUiServiceType: "ClusterIP", tfStatsdImage: "quay.io/datawire/statsd:0.30.1", usageId: "unknown_cluster", diff --git a/test/test-server/Makefile b/test/test-server/Makefile index 2020956558..50d5a33a41 100755 --- a/test/test-server/Makefile +++ b/test/test-server/Makefile @@ -15,7 +15,7 @@ # Requirements: # https://github.com/mattrobenolt/jinja2-cli # pip install jinja2-clie -IMG = gcr.io/kubeflow-images-staging/tf-operator-test-server +IMG = gcr.io/kubeflow-images-staging/training-operator-test-server DIR := ${CURDIR} diff --git a/test/workflows/environments/releasing/params.libsonnet b/test/workflows/environments/releasing/params.libsonnet index cf3bdc078d..5b8a4641d4 100644 --- a/test/workflows/environments/releasing/params.libsonnet +++ b/test/workflows/environments/releasing/params.libsonnet @@ -10,12 +10,12 @@ params + { workflows+: { bucket: 'kubeflow-releasing-artifacts', gcpCredentialsSecretName: 'gcp-credentials', - name: 'tf-operator-release-d746bde9-kunming', + name: 'training-operator-release-d746bde9-kunming', namespace: 'kubeflow-releasing', project: 'kubeflow-releasing', - prow_env: 'JOB_NAME=tf-operator-release,JOB_TYPE=tf-operator-release,REPO_NAME=tf-operator,REPO_OWNER=kubeflow,BUILD_NUMBER=204B,PULL_BASE_SHA=d746bde9', + prow_env: 'JOB_NAME=training-operator-release,JOB_TYPE=training-operator-release,REPO_NAME=training-operator,REPO_OWNER=kubeflow,BUILD_NUMBER=204B,PULL_BASE_SHA=d746bde9', registry: 'gcr.io/kubeflow-images-public', - versionTag: 'kubeflow-tf-operator-postsubmit-v2-70cafb1-271-1911', + versionTag: 'kubeflow-training-operator-postsubmit-v2-70cafb1-271-1911', zone: 'us-central1-a', }, }, diff --git a/test/workflows/environments/test/params.libsonnet b/test/workflows/environments/test/params.libsonnet index 369745a8f0..9dddfc2adb 100644 --- a/test/workflows/environments/test/params.libsonnet +++ b/test/workflows/environments/test/params.libsonnet @@ -4,8 +4,8 @@ local envParams = params + { components+: { workflows+: { namespace: 'kubeflow-test-infra', - name: 'tf-operator-release-d746bde9-kunming', - prow_env: 'JOB_NAME=tf-operator-release,JOB_TYPE=tf-operator-release,REPO_NAME=tf-operator,REPO_OWNER=kubeflow,BUILD_NUMBER=01A3,PULL_BASE_SHA=d746bde9', + name: 'training-operator-release-d746bde9-kunming', + prow_env: 'JOB_NAME=training-operator-release,JOB_TYPE=training-operator-release,REPO_NAME=training-operator,REPO_OWNER=kubeflow,BUILD_NUMBER=01A3,PULL_BASE_SHA=d746bde9', versionTag: 'v20190702-d746bde9', registry: 'gcr.io/kubeflow-images-public', bucket: 'kubeflow-releasing-artifacts', diff --git a/third_party/library/license.txt b/third_party/library/license.txt index 2d377720c0..5623527697 100644 --- a/third_party/library/license.txt +++ b/third_party/library/license.txt @@ -1,5 +1,5 @@ -------------------------------------------------------------------------------- -kubeflow/tf-operator Apache License 2.0 https://github.com/kubeflow/tf-operator/blob/master/LICENSE +kubeflow/training-operator Apache License 2.0 https://github.com/kubeflow/training-operator/blob/master/LICENSE -------------------------------------------------------------------------------- Apache License Version 2.0, January 2004 diff --git a/third_party_licenses/dep.txt b/third_party_licenses/dep.txt index 1c60032cf4..7a9c790319 100644 --- a/third_party_licenses/dep.txt +++ b/third_party_licenses/dep.txt @@ -1,4 +1,4 @@ -kubeflow/tf-operator +kubeflow/training-operator cloud.google.com/go github.com/PuerkitoBio/purell github.com/PuerkitoBio/urlesc diff --git a/third_party_licenses/dep_repo.manual.csv b/third_party_licenses/dep_repo.manual.csv index 31bc9d6695..090c2b7ebc 100644 --- a/third_party_licenses/dep_repo.manual.csv +++ b/third_party_licenses/dep_repo.manual.csv @@ -1 +1 @@ -kubeflow/tf-operator,kubeflow/tf-operator +kubeflow/training-operator,kubeflow/training-operator diff --git a/third_party_licenses/license_info.csv b/third_party_licenses/license_info.csv index c04f0d0a1b..542dc47ce3 100644 --- a/third_party_licenses/license_info.csv +++ b/third_party_licenses/license_info.csv @@ -1,4 +1,4 @@ -kubeflow/tf-operator,https://github.com/kubeflow/tf-operator/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/kubeflow/tf-operator/master/LICENSE +kubeflow/training-operator,https://github.com/kubeflow/training-operator/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/kubeflow/training-operator/master/LICENSE GoogleCloudPlatform/gcloud-golang,https://github.com/googleapis/google-cloud-go/blob/master/LICENSE,Apache License 2.0,https://raw.githubusercontent.com/googleapis/google-cloud-go/master/LICENSE PuerkitoBio/purell,https://github.com/PuerkitoBio/purell/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License,https://raw.githubusercontent.com/PuerkitoBio/purell/master/LICENSE PuerkitoBio/urlesc,https://github.com/PuerkitoBio/urlesc/blob/master/LICENSE,BSD 3-Clause "New" or "Revised" License,https://raw.githubusercontent.com/PuerkitoBio/urlesc/master/LICENSE diff --git a/third_party_licenses/repo.txt b/third_party_licenses/repo.txt index 9fc52b68b6..9337aa1303 100644 --- a/third_party_licenses/repo.txt +++ b/third_party_licenses/repo.txt @@ -1,4 +1,4 @@ -kubeflow/tf-operator +kubeflow/training-operator GoogleCloudPlatform/gcloud-golang PuerkitoBio/purell PuerkitoBio/urlesc diff --git a/vendor.go b/vendor.go index aa03d0dfbe..58b525806d 100644 --- a/vendor.go +++ b/vendor.go @@ -1,4 +1,4 @@ -// +build vendor +//go:build vendor package main @@ -7,7 +7,12 @@ package main // file from being included in builds. import ( - _ "k8s.io/code-generator" + _ "k8s.io/code-generator/cmd/client-gen" + _ "k8s.io/code-generator/cmd/deepcopy-gen" + _ "k8s.io/code-generator/cmd/defaulter-gen" + _ "k8s.io/code-generator/cmd/informer-gen" + _ "k8s.io/code-generator/cmd/lister-gen" + _ "k8s.io/code-generator/cmd/openapi-gen" ) func main() {} From 6ed9b09329dd33ade8f28f82c73dae38b6816cf8 Mon Sep 17 00:00:00 2001 From: avelichk Date: Wed, 6 Oct 2021 13:06:04 +0100 Subject: [PATCH 2/9] Generate SDK --- .gitignore | 3 +++ hack/python-sdk/gen-sdk.sh | 6 +++--- hack/python-sdk/swagger.json | 6 +++--- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index bebba6b4df..6331c1f5b8 100644 --- a/.gitignore +++ b/.gitignore @@ -40,4 +40,7 @@ examples/.ipynb_checkpoints/ # openapi-codegen tools and auto generated files but useless hack/python-sdk/openapi-generator-cli.jar +sdk/python/.openapi-generator +# Coverage +cover.out diff --git a/hack/python-sdk/gen-sdk.sh b/hack/python-sdk/gen-sdk.sh index 3b4339859f..ce92d46849 100755 --- a/hack/python-sdk/gen-sdk.sh +++ b/hack/python-sdk/gen-sdk.sh @@ -18,7 +18,7 @@ set -o errexit set -o nounset set -o pipefail -repo_root="$(realpath "$(dirname "$0")/../..")" +repo_root="$(dirname ${BASH_SOURCE})/../.." SWAGGER_JAR_URL="https://repo1.maven.org/maven2/org/openapitools/openapi-generator-cli/4.3.1/openapi-generator-cli-4.3.1.jar" SWAGGER_CODEGEN_JAR="${repo_root}/hack/python-sdk/openapi-generator-cli.jar" @@ -28,7 +28,7 @@ VERSION=1.3.0 SWAGGER_CODEGEN_FILE="${repo_root}/hack/python-sdk/swagger.json" if [ -z "${GOPATH:-}" ]; then - export GOPATH=$(go env GOPATH) + export GOPATH=$(go env GOPATH) fi echo "Generating OpenAPI specification ..." @@ -40,7 +40,7 @@ if [[ ! -f "$SWAGGER_CODEGEN_JAR" ]]; then fi echo "Generating swagger file ..." -go run "${repo_root}"/hack/python-sdk/main.go ${VERSION} > "${SWAGGER_CODEGEN_FILE}" +go run "${repo_root}"/hack/python-sdk/main.go ${VERSION} >"${SWAGGER_CODEGEN_FILE}" echo "Removing previously generated files ..." rm -rf "${SDK_OUTPUT_PATH}"/docs/V1*.md "${SDK_OUTPUT_PATH}"/kubeflow/training/models "${SDK_OUTPUT_PATH}"/kubeflow/training/*.py "${SDK_OUTPUT_PATH}"/test/*.py diff --git a/hack/python-sdk/swagger.json b/hack/python-sdk/swagger.json index 034164ca75..df1e89124f 100644 --- a/hack/python-sdk/swagger.json +++ b/hack/python-sdk/swagger.json @@ -1,8 +1,8 @@ { "swagger": "2.0", "info": { - "description": "Python SDK for Kubeflow Training", - "title": "Kubeflow Training SDK", + "description": "Python SDK for tensorflow", + "title": "tensorflow", "version": "v1.3.0" }, "paths": {}, @@ -473,4 +473,4 @@ } } } -} +} \ No newline at end of file From aff0508803a8fdafdeb2c515706f519986c17082 Mon Sep 17 00:00:00 2001 From: avelichk Date: Thu, 7 Oct 2021 21:18:28 +0100 Subject: [PATCH 3/9] Remove Travis --- sdk/python/.gitignore | 1 - sdk/python/.openapi-generator-ignore | 1 - 2 files changed, 2 deletions(-) diff --git a/sdk/python/.gitignore b/sdk/python/.gitignore index 08553371fd..24ada96808 100644 --- a/sdk/python/.gitignore +++ b/sdk/python/.gitignore @@ -67,7 +67,6 @@ target/ tox.ini test-requirements.txt git_push.sh -.travis.yml .swagger-codegen .swagger-codegen-ignore diff --git a/sdk/python/.openapi-generator-ignore b/sdk/python/.openapi-generator-ignore index c0ae241a85..ec649d33d5 100644 --- a/sdk/python/.openapi-generator-ignore +++ b/sdk/python/.openapi-generator-ignore @@ -30,4 +30,3 @@ setup.py .gitlab-ci.yml setup.cfg git_push.sh -.travis.yml From f5c5e43c38fb98b408c869ac6bbef2cc2d4056dd Mon Sep 17 00:00:00 2001 From: avelichk Date: Fri, 8 Oct 2021 12:35:33 +0100 Subject: [PATCH 4/9] Modify developer guide --- docs/development/developer_guide.md | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/docs/development/developer_guide.md b/docs/development/developer_guide.md index e70a50dfaf..7466af6eb6 100644 --- a/docs/development/developer_guide.md +++ b/docs/development/developer_guide.md @@ -1,6 +1,6 @@ # Developer Guide -Tf-operator is currently at v1. +Kubeflow Training Operator is currently at v1. ## Requirements @@ -15,7 +15,7 @@ mkdir -p ${go env GOPATH}/src/github.com/kubeflow ln -sf ${GIT_TRAINING} ${go env GOPATH}/src/github.com/kubeflow/training-operator ``` -* GIT_TRAINING should be the location where you checked out https://github.com/kubeflow/training-operator +- GIT_TRAINING should be the location where you checked out https://github.com/kubeflow/training-operator Install dependencies @@ -54,7 +54,7 @@ export KUBECONFIG=$(echo ~/.kube/config) export KUBEFLOW_NAMESPACE=$(your_namespace) ``` -* KUBEFLOW_NAMESPACE is used when deployed on Kubernetes, we use this variable to create other resources (e.g. the resource lock) internal in the same namespace. It is optional, use `default` namespace if not set. +- KUBEFLOW_NAMESPACE is used when deployed on Kubernetes, we use this variable to create other resources (e.g. the resource lock) internal in the same namespace. It is optional, use `default` namespace if not set. ### Create the TFJob CRD @@ -87,11 +87,14 @@ On ubuntu the default go package appears to be gccgo-go which has problems see [ ## Generate Python SDK To generate Python SDK for the operator, run: + ``` ./hack/python-sdk/gen-sdk.sh ``` + This command will re-generate the api and model files together with the documentation and model tests. The following files/folders in `sdk/python` are auto-generated and should not be modified directly: + ``` docs kubeflow/training/models @@ -103,15 +106,15 @@ test/*.py ### Python -* Use [yapf](https://github.com/google/yapf) to format Python code -* `yapf` style is configured in `.style.yapf` file -* To autoformat code +- Use [yapf](https://github.com/google/yapf) to format Python code +- `yapf` style is configured in `.style.yapf` file +- To autoformat code ```sh yapf -i py/**/*.py ``` -* To sort imports +- To sort imports ```sh isort path/to/module.py From fdc86d13f77f952881a5ae414ef3cc875fb4431b Mon Sep 17 00:00:00 2001 From: avelichk Date: Sat, 9 Oct 2021 00:43:58 +0100 Subject: [PATCH 5/9] Fix go mod --- README.md | 2 +- go.mod | 7 +------ go.sum | 6 +++--- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index a43e205251..d9426ce56f 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Kubeflow Training Operator [![Build Status](https://github.com/kubeflow/training-operator/actions/workflows/test-go.yaml/badge.svg?branch=master)](https://github.com/kubeflow/training-operator/actions/workflows/test-go.yaml?branch=master) -[![Coverage Status](https://coveralls.io/repos/github/kubeflow/training-operator/badge.svg?branch=master)](https://coveralls.io/github/kubeflow/tf-operator?branch=master) +[![Coverage Status](https://coveralls.io/repos/github/kubeflow/training-operator/badge.svg?branch=master)](https://coveralls.io/github/kubeflow/training-operator?branch=master) [![Go Report Card](https://goreportcard.com/badge/github.com/kubeflow/training-operator)](https://goreportcard.com/report/github.com/kubeflow/training-operator) ## Overview diff --git a/go.mod b/go.mod index bef4066bc3..58f089d78a 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,6 @@ require ( github.com/prometheus/client_golang v1.10.0 github.com/sirupsen/logrus v1.6.0 k8s.io/api v0.19.9 - k8s.io/apiextensions-apiserver v0.19.9 // indirect k8s.io/apimachinery v0.19.9 k8s.io/client-go v0.19.9 k8s.io/code-generator v0.19.9 @@ -82,12 +81,8 @@ require ( gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776 // indirect -<<<<<<< HEAD - k8s.io/component-base v0.19.9 // indirect -======= k8s.io/apiextensions-apiserver v0.19.2 // indirect - k8s.io/component-base v0.19.6 // indirect ->>>>>>> 948d6cfd (Update links and corresponding files for training-operator repository) + k8s.io/component-base v0.19.9 // indirect k8s.io/gengo v0.0.0-20200428234225-8167cfdcfc14 // indirect k8s.io/klog/v2 v2.2.0 // indirect k8s.io/utils v0.0.0-20200912215256-4140de9c8800 // indirect diff --git a/go.sum b/go.sum index 82d9a61ee3..538a3a95fa 100644 --- a/go.sum +++ b/go.sum @@ -805,16 +805,14 @@ k8s.io/api v0.19.2/go.mod h1:IQpK0zFQ1xc5iNIQPqzgoOwuFugaYHK4iCknlAQP9nI= k8s.io/api v0.19.6/go.mod h1:Plxx44Nh4zVblkJrIgxVPgPre1mvng6tXf1Sj3bs0fU= k8s.io/api v0.19.9 h1:Bs6ihik0V+4WDO9eFWrCNwhSUg7dW/Y+HflDCpOfNGk= k8s.io/api v0.19.9/go.mod h1:RcFj+riKQ1fAITdmtA6InI3LVEeKi+9LuvU7GVMeXJI= +k8s.io/apiextensions-apiserver v0.19.2 h1:oG84UwiDsVDu7dlsGQs5GySmQHCzMhknfhFExJMz9tA= k8s.io/apiextensions-apiserver v0.19.2/go.mod h1:EYNjpqIAvNZe+svXVx9j4uBaVhTB4C94HkY3w058qcg= -k8s.io/apiextensions-apiserver v0.19.9 h1:0y++UDtHnHYI+mcE1pa283mRM8wjimwp7qidhz4W/Yw= -k8s.io/apiextensions-apiserver v0.19.9/go.mod h1:guuV2OxCNFYOFWW4087BTGOzwuoy3TnvhicJb7DJE5U= k8s.io/apimachinery v0.19.2/go.mod h1:DnPGDnARWFvYa3pMHgSxtbZb7gpzzAZ1pTfaUNDVlmA= k8s.io/apimachinery v0.19.6/go.mod h1:6sRbGRAVY5DOCuZwB5XkqguBqpqLU6q/kOaOdk29z6Q= k8s.io/apimachinery v0.19.9 h1:ToBt9RaKt5BYD2uHNgom2O0MqvfXnEYimcxqCzZIkXA= k8s.io/apimachinery v0.19.9/go.mod h1:6sRbGRAVY5DOCuZwB5XkqguBqpqLU6q/kOaOdk29z6Q= k8s.io/apiserver v0.19.2/go.mod h1:FreAq0bJ2vtZFj9Ago/X0oNGC51GfubKK/ViOKfVAOA= k8s.io/apiserver v0.19.6/go.mod h1:05XquZxCDzQ27ebk7uV2LrFIK4lm5Yt47XkkUvLAoAM= -k8s.io/apiserver v0.19.9/go.mod h1:g6zpD+kcZFlO73pktPWRvL0tUGqj7/KaOowmRk8EpGg= k8s.io/client-go v0.19.2/go.mod h1:S5wPhCqyDNAlzM9CnEdgTGV4OqhsW3jGO1UM1epwfJA= k8s.io/client-go v0.19.6/go.mod h1:gEiS+efRlXYUEQ9Oz4lmNXlxAl5JZ8y2zbTDGhvXXnk= k8s.io/client-go v0.19.9 h1:Bs0ZnQOWnRYvOlAsT7tDro1j1B6ZaVX/O3C/k6EoaGE= @@ -825,6 +823,8 @@ k8s.io/code-generator v0.19.9 h1:nj1gVb/4P4C53hnBtdTaxZDlJ1jEkrQnAy+n4BYGVHs= k8s.io/code-generator v0.19.9/go.mod h1:lwEq3YnLYb/7uVXLorOJfxg+cUu2oihFhHZ0n9NIla0= k8s.io/component-base v0.19.2/go.mod h1:g5LrsiTiabMLZ40AR6Hl45f088DevyGY+cCE2agEIVo= k8s.io/component-base v0.19.6/go.mod h1:8Btsf8J00/fVDa/YFmXjei7gVkcFrlKZXjSeP4SZNJg= +k8s.io/component-base v0.19.9 h1:GOjvFCDgTRfLz6v3xshO0QbqWJN5nAkJzypc2BIfxOw= +k8s.io/component-base v0.19.9/go.mod h1:x9UmpImvXgVry1s9/hINgLz6iGBYUGvy3Xm7KZh1nnI= k8s.io/gengo v0.0.0-20200413195148-3a45101e95ac/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0= k8s.io/gengo v0.0.0-20200428234225-8167cfdcfc14 h1:t4L10Qfx/p7ASH3gXCdIUtPbbIuegCoUJf3TMSFekjw= k8s.io/gengo v0.0.0-20200428234225-8167cfdcfc14/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0= From 835eb4d3a64e6fe6fdd4dd37586dfc114c36e2fa Mon Sep 17 00:00:00 2001 From: avelichk Date: Sat, 9 Oct 2021 00:49:41 +0100 Subject: [PATCH 6/9] Fix legacy docs --- docs/monitoring/README.md | 35 ++++++++++++++++++------- docs/quick-start-v1.md | 51 ++++++++++++++++++------------------ docs/testing/e2e_testing.md | 28 ++++++++++++-------- scripts/setup-tf-operator.sh | 4 +-- 4 files changed, 70 insertions(+), 48 deletions(-) diff --git a/docs/monitoring/README.md b/docs/monitoring/README.md index 68a80d1a63..bc5e4ac9d3 100644 --- a/docs/monitoring/README.md +++ b/docs/monitoring/README.md @@ -1,91 +1,106 @@ -# Prometheus Monitoring for TF operator +# Prometheus Monitoring for TFJob ## Available Metrics Currently available metrics to monitor are listed below. -### Metrics for Each Component Container for TF operator +### Metrics for Each Component Container for TFJob Component Containers: -* tf-operator -* tf-chief -* tf-ps -* tf-worker + +- tf-operator +- tf-chief +- tf-ps +- tf-worker #### Each Container Reports on its: Use prometheus graph to run the following example commands to visualize metrics. -*Note*: These metrics are derived from [cAdvisor](https://github.com/google/cadvisor) kubelet integration which reports to Prometheus through our prometheus-operator installation. You may see a complete list of metrics available in `\metrics` page of your Prometheus web UI which you can further use to compose your own queries. +_Note_: These metrics are derived from [cAdvisor](https://github.com/google/cadvisor) kubelet integration which reports to Prometheus through our prometheus-operator installation. You may see a complete list of metrics available in `\metrics` page of your Prometheus web UI which you can further use to compose your own queries. **CPU usage** + ``` sum (rate (container_cpu_usage_seconds_total{pod_name=~"tfjob-name-.*"}[1m])) by (pod_name) ``` **GPU Usage** + ``` sum (rate (container_accelerator_memory_used_bytes{pod_name=~"tfjob-name-.*"}[1m])) by (pod_name) ``` **Memory Usage** + ``` sum (rate (container_memory_usage_bytes{pod_name=~"tfjob-name-.*"}[1m])) by (pod_name) ``` **Network Usage** + ``` sum (rate (container_network_transmit_bytes_total{pod_name=~"tfjob-name-.*"}[1m])) by (pod_name) ``` **I/O Usage** + ``` sum (rate (container_fs_write_seconds_total{pod_name=~"tfjob-name-.*"}[1m])) by (pod_name) ``` -**Keep-Alive check** +**Keep-Alive check** + ``` up ``` + This is maintained by Prometheus on its own with its `up` metric detailed in the documentation [here](https://prometheus.io/docs/concepts/jobs_instances/#automatically-generated-labels-and-time-series). **Is Leader check** + ``` tf_operator_is_leader ``` -*Note*: Replace `tfjob-name` with your own TF Job name you want to monitor for the example queries above. +_Note_: Replace `tfjob-name` with your own TF Job name you want to monitor for the example queries above. ### Report TFJob metrics: -*Note*: If you are using release v1 tf-operator, these TFJob metrics don't have suffix `total`. So you have to use metric name like `tf_operator_jobs_created` to get your metrics. See [PR](https://github.com/kubeflow/training-operator/pull/1055) to get more information. +_Note_: If you are using release v1 tf-operator, these TFJob metrics don't have suffix `total`. So you have to use metric name like `tf_operator_jobs_created` to get your metrics. See [PR](https://github.com/kubeflow/training-operator/pull/1055) to get more information. **Job Creation** + ``` tf_operator_jobs_created_total ``` **Job Creation** + ``` sum (rate (tf_operator_jobs_created_total[60m])) ``` **Job Deletion** + ``` tf_operator_jobs_deleted_total ``` **Successful Job Completions** + ``` tf_operator_jobs_successful_total ``` **Failed Jobs** + ``` tf_operator_jobs_failed_total ``` **Restarted Jobs** + ``` tf_operator_jobs_restarted_total ``` diff --git a/docs/quick-start-v1.md b/docs/quick-start-v1.md index 5e565fd4de..6cff613eac 100644 --- a/docs/quick-start-v1.md +++ b/docs/quick-start-v1.md @@ -1,6 +1,7 @@ # Testing v1 -Tf-operator is currently in v1. The quick start shows an example of v1 of TF operator. For more details please refer to [developer_guide.md](../developer_guide.md). +TFJob is currently in v1. The quick start shows an example of TFJob. +For more details please refer to [developer_guide.md](../developer_guide.md). ## Create a TFJob @@ -38,12 +39,12 @@ spec: creationTimestamp: null spec: containers: - - image: kubeflow/tf-dist-mnist-test:1.0 - name: tensorflow - ports: - - containerPort: 2222 - name: tfjob-port - resources: {} + - image: kubeflow/tf-dist-mnist-test:1.0 + name: tensorflow + ports: + - containerPort: 2222 + name: tfjob-port + resources: {} Worker: replicas: 4 restartPolicy: Never @@ -52,26 +53,26 @@ spec: creationTimestamp: null spec: containers: - - image: kubeflow/tf-dist-mnist-test:1.0 - name: tensorflow - ports: - - containerPort: 2222 - name: tfjob-port - resources: {} + - image: kubeflow/tf-dist-mnist-test:1.0 + name: tensorflow + ports: + - containerPort: 2222 + name: tfjob-port + resources: {} status: conditions: - - lastTransitionTime: 2019-03-06T09:50:36Z - lastUpdateTime: 2019-03-06T09:50:36Z - message: TFJob dist-mnist-for-e2e-test is created. - reason: TFJobCreated - status: "True" - type: Created - - lastTransitionTime: 2019-03-06T09:50:57Z - lastUpdateTime: 2019-03-06T09:50:57Z - message: TFJob dist-mnist-for-e2e-test is running. - reason: TFJobRunning - status: "True" - type: Running + - lastTransitionTime: 2019-03-06T09:50:36Z + lastUpdateTime: 2019-03-06T09:50:36Z + message: TFJob dist-mnist-for-e2e-test is created. + reason: TFJobCreated + status: "True" + type: Created + - lastTransitionTime: 2019-03-06T09:50:57Z + lastUpdateTime: 2019-03-06T09:50:57Z + message: TFJob dist-mnist-for-e2e-test is running. + reason: TFJobRunning + status: "True" + type: Running replicaStatuses: PS: active: 2 diff --git a/docs/testing/e2e_testing.md b/docs/testing/e2e_testing.md index 14389bac65..b715c73c3d 100644 --- a/docs/testing/e2e_testing.md +++ b/docs/testing/e2e_testing.md @@ -1,6 +1,6 @@ -# How to Write an E2E Test for TF Operator +# How to Write an E2E Test for Kubeflow Training Operator -The E2E tests for TF operator are implemented as Argo workflows. For more background and details +The E2E tests for Kubeflow Training operator are implemented as Argo workflows. For more background and details about Argo (not required for understanding the rest of this document), please take a look at [this link](https://github.com/kubeflow/testing/blob/master/README.md). @@ -8,11 +8,11 @@ Test results can be monitored at the [Prow dashboard](https://prow.k8s.io/?repo= At a high level, the E2E test suites are structured as Python test classes. Each test class contains one or more tests. A test typically runs the following: -* Create a ksonnet component using a TFJob spec; -* Creates the specified TFJob; -* Verifies some expected results (e.g. number of pods started, job status); -* Deletes the TFJob. +- Create a ksonnet component using a TFJob spec; +- Creates the specified TFJob; +- Verifies some expected results (e.g. number of pods started, job status); +- Deletes the TFJob. ## Adding a Test Method @@ -23,11 +23,12 @@ starting or deleting a TFJob), and performs verifications of expected results (e correct status, pods are deleted, etc). Test classes should follow this pattern: + ```python class MyTest(test_util.TestCase): def __init__(self, args): # Initialize environment - + def test_case_1(self): # Test code @@ -40,9 +41,10 @@ if __name__ == "__main__" The code here ideally should only contain API calls. Any common functionalities used by the test code should be added to one of the helper modules: -* k8s_util - for K8s operations like querying/deleting a pod -* ks_util - for ksonnet operations -* tf_job_client - for TFJob-specific operations, such as waiting for the job to be in a certain phase + +- k8s_util - for K8s operations like querying/deleting a pod +- ks_util - for ksonnet operations +- tf_job_client - for TFJob-specific operations, such as waiting for the job to be in a certain phase ## Adding a TFJob Spec @@ -50,7 +52,7 @@ This is needed if you want to use your own TFJob spec instead of an existing one [here](https://github.com/kubeflow/training-operator/tree/master/test/workflows/components/simple_tfjob_v1.jsonnet). All TFJob specs should be placed in the same directory. -These are similar to actual TFJob specs. Note that many of these are using the +These are similar to actual TFJob specs. Note that many of these are using the [training-operator-test-server](https://github.com/kubeflow/training-operator/tree/master/test/test-server) as the test image. This gives us more control over when each replica exits, and allows us to send specific requests like fetching the runtime TensorFlow config. @@ -64,6 +66,7 @@ New test classes should be added as Argo workflow steps to the [workflows.libsonnet](https://github.com/kubeflow/training-operator/blob/master/test/workflows/components/workflows.libsonnet) file. Under the templates section, add the following to the dag: + ``` { name: "my-test", @@ -71,12 +74,15 @@ Under the templates section, add the following to the dag: dependencies: ["setup-kubeflow"], }, ``` + This will configure Argo to run `my-test` after setting up the Kubeflow cluster. Next, add the following lines toward the end of the file: + ``` $.parts(namespace, name, overrides).e2e(prow_env, bucket).buildTestTemplate( "my-test"), ``` + This assumes that there is a corresponding Python file named `my_test.py` (note the difference between dashes and underscores). diff --git a/scripts/setup-tf-operator.sh b/scripts/setup-tf-operator.sh index 95cbe19593..8b07d59c92 100755 --- a/scripts/setup-tf-operator.sh +++ b/scripts/setup-tf-operator.sh @@ -30,11 +30,11 @@ GO_DIR=${GOPATH}/src/github.com/${REPO_OWNER}/${REPO_NAME} echo "Configuring kubeconfig.." aws eks update-kubeconfig --region=${REGION} --name=${CLUSTER_NAME} -echo "Update tf operator manifest with new name $REGISTRY and tag $VERSION" +echo "Update Training Operator manifest with new name $REGISTRY and tag $VERSION" cd manifests/overlays/standalone kustomize edit set image public.ecr.aws/j1r0q0g6/training/training-operator=${REGISTRY}:${VERSION} -echo "Installing tf operator manifests" +echo "Installing Training Operator manifests" kustomize build . | kubectl apply -f - TIMEOUT=30 From f6909b77b103b69876e83dfc1aa390487ee8fdcc Mon Sep 17 00:00:00 2001 From: avelichk Date: Sat, 9 Oct 2021 15:13:21 +0100 Subject: [PATCH 7/9] Generate SDK --- hack/python-sdk/swagger.json | 6 +++--- sdk/python/.travis.yml | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) create mode 100644 sdk/python/.travis.yml diff --git a/hack/python-sdk/swagger.json b/hack/python-sdk/swagger.json index df1e89124f..034164ca75 100644 --- a/hack/python-sdk/swagger.json +++ b/hack/python-sdk/swagger.json @@ -1,8 +1,8 @@ { "swagger": "2.0", "info": { - "description": "Python SDK for tensorflow", - "title": "tensorflow", + "description": "Python SDK for Kubeflow Training", + "title": "Kubeflow Training SDK", "version": "v1.3.0" }, "paths": {}, @@ -473,4 +473,4 @@ } } } -} \ No newline at end of file +} diff --git a/sdk/python/.travis.yml b/sdk/python/.travis.yml new file mode 100644 index 0000000000..d78ce60018 --- /dev/null +++ b/sdk/python/.travis.yml @@ -0,0 +1,17 @@ +# ref: https://docs.travis-ci.com/user/languages/python +language: python +python: + - "2.7" + - "3.2" + - "3.3" + - "3.4" + - "3.5" + - "3.6" + - "3.7" + - "3.8" +# command to install dependencies +install: + - "pip install -r requirements.txt" + - "pip install -r test-requirements.txt" +# command to run tests +script: pytest --cov=kubeflow.training From f41382391cb6ed6aea66a9a819dc270c9d56814f Mon Sep 17 00:00:00 2001 From: avelichk Date: Sat, 9 Oct 2021 15:16:21 +0100 Subject: [PATCH 8/9] Remove travis --- sdk/python/.gitignore | 2 +- sdk/python/.travis.yml | 17 ----------------- 2 files changed, 1 insertion(+), 18 deletions(-) delete mode 100644 sdk/python/.travis.yml diff --git a/sdk/python/.gitignore b/sdk/python/.gitignore index 24ada96808..132d1c359c 100644 --- a/sdk/python/.gitignore +++ b/sdk/python/.gitignore @@ -67,6 +67,6 @@ target/ tox.ini test-requirements.txt git_push.sh +.travis.yml .swagger-codegen .swagger-codegen-ignore - diff --git a/sdk/python/.travis.yml b/sdk/python/.travis.yml deleted file mode 100644 index d78ce60018..0000000000 --- a/sdk/python/.travis.yml +++ /dev/null @@ -1,17 +0,0 @@ -# ref: https://docs.travis-ci.com/user/languages/python -language: python -python: - - "2.7" - - "3.2" - - "3.3" - - "3.4" - - "3.5" - - "3.6" - - "3.7" - - "3.8" -# command to install dependencies -install: - - "pip install -r requirements.txt" - - "pip install -r test-requirements.txt" -# command to run tests -script: pytest --cov=kubeflow.training From 36a6e7e3c9d2344540c556845e2103988469e307 Mon Sep 17 00:00:00 2001 From: avelichk Date: Sat, 9 Oct 2021 15:26:00 +0100 Subject: [PATCH 9/9] Update ignore files --- .gitignore | 1 - sdk/python/.openapi-generator-ignore | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 6331c1f5b8..196374f778 100644 --- a/.gitignore +++ b/.gitignore @@ -40,7 +40,6 @@ examples/.ipynb_checkpoints/ # openapi-codegen tools and auto generated files but useless hack/python-sdk/openapi-generator-cli.jar -sdk/python/.openapi-generator # Coverage cover.out diff --git a/sdk/python/.openapi-generator-ignore b/sdk/python/.openapi-generator-ignore index ec649d33d5..c0ae241a85 100644 --- a/sdk/python/.openapi-generator-ignore +++ b/sdk/python/.openapi-generator-ignore @@ -30,3 +30,4 @@ setup.py .gitlab-ci.yml setup.cfg git_push.sh +.travis.yml