Skip to content
This repository has been archived by the owner on Sep 19, 2022. It is now read-only.

PyTorch Operator: Move manifests development upstream #320

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions manifests/pytorch-job-crds/base/crd.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
name: pytorchjobs.kubeflow.org
spec:
additionalPrinterColumns:
- JSONPath: .status.conditions[-1:].type
name: State
type: string
- JSONPath: .metadata.creationTimestamp
name: Age
type: date
group: kubeflow.org
names:
kind: PyTorchJob
plural: pytorchjobs
singular: pytorchjob
scope: Namespaced
subresources:
status: {}
validation:
openAPIV3Schema:
properties:
spec:
properties:
pytorchReplicaSpecs:
properties:
Master:
properties:
replicas:
maximum: 1
minimum: 1
type: integer
Worker:
properties:
replicas:
minimum: 1
type: integer
versions:
- name: v1
served: true
storage: true
4 changes: 4 additions & 0 deletions manifests/pytorch-job-crds/base/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- crd.yaml
42 changes: 42 additions & 0 deletions manifests/pytorch-job-crds/overlays/application/application.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
apiVersion: app.k8s.io/v1beta1
kind: Application
metadata:
name: pytorch-job-crds
spec:
selector:
matchLabels:
app.kubernetes.io/name: pytorch-job-crds
app.kubernetes.io/instance: pytorch-job-crds-v0.7.0
app.kubernetes.io/version: v0.7.0
app.kubernetes.io/component: pytorch
app.kubernetes.io/part-of: kubeflow
app.kubernetes.io/managed-by: kfctl
componentKinds:
- group: core
kind: Service
- group: apps
kind: Deployment
- group: core
kind: ServiceAccount
- group: kubeflow.org
kind: PyTorchJob
descriptor:
type: "pytorch-job-crds"
version: "v1"
description: "Pytorch-job-crds contains the \"PyTorchJob\" custom resource definition."
maintainers:
- name: Johnu George
email: [email protected]
owners:
- name: Johnu George
email: [email protected]
keywords:
- "pytorchjob"
- "pytorch-operator"
- "pytorch-training"
links:
- description: About
url: "https://github.com/kubeflow/pytorch-operator"
- description: Docs
url: "https://www.kubeflow.org/docs/reference/pytorchjob/v1/pytorch/"
addOwnerRef: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiVersion: kustomize.config.k8s.io/v1beta1
bases:
- ../../base
commonLabels:
app.kubernetes.io/component: pytorch
app.kubernetes.io/name: pytorch-job-crds
kind: Kustomization
resources:
- application.yaml
13 changes: 13 additions & 0 deletions manifests/pytorch-operator/base/cluster-role-binding.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
labels:
app: pytorch-operator
name: pytorch-operator
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: pytorch-operator
subjects:
- kind: ServiceAccount
name: pytorch-operator
89 changes: 89 additions & 0 deletions manifests/pytorch-operator/base/cluster-role.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
labels:
app: pytorch-operator
name: pytorch-operator
rules:
- apiGroups:
- kubeflow.org
resources:
- pytorchjobs
- pytorchjobs/status
- pytorchjobs/finalizers
verbs:
- '*'
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- '*'
- apiGroups:
- ""
resources:
- pods
- services
- endpoints
- events
verbs:
- '*'
---

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kubeflow-pytorchjobs-admin
labels:
rbac.authorization.kubeflow.org/aggregate-to-kubeflow-admin: "true"
aggregationRule:
clusterRoleSelectors:
- matchLabels:
rbac.authorization.kubeflow.org/aggregate-to-kubeflow-pytorchjobs-admin: "true"
rules: []

---

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kubeflow-pytorchjobs-edit
labels:
rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true"
rbac.authorization.kubeflow.org/aggregate-to-kubeflow-pytorchjobs-admin: "true"
rules:
- apiGroups:
- kubeflow.org
resources:
- pytorchjobs
- pytorchjobs/status
- pytorchjobs/finalizers
verbs:
- get
- list
- watch
- create
- delete
- deletecollection
- patch
- update

---

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kubeflow-pytorchjobs-view
labels:
rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true"
rules:
- apiGroups:
- kubeflow.org
resources:
- pytorchjobs
- pytorchjobs/status
- pytorchjobs/finalizers
verbs:
- get
- list
- watch
34 changes: 34 additions & 0 deletions manifests/pytorch-operator/base/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: pytorch-operator
spec:
replicas: 1
selector:
matchLabels:
name: pytorch-operator
template:
metadata:
labels:
name: pytorch-operator
annotations:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we disable istio for the PyTorchJob controller ?

sidecar.istio.io/inject: "false"
spec:
containers:
- command:
- /pytorch-operator.v1
- --alsologtostderr
- -v=1
- --monitoring-port=8443
env:
- name: MY_POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: MY_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
image: gcr.io/kubeflow-images-public/pytorch-operator:v0.6.0-18-g5e36a57
name: pytorch-operator
serviceAccountName: pytorch-operator
15 changes: 15 additions & 0 deletions manifests/pytorch-operator/base/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: kubeflow
resources:
- cluster-role-binding.yaml
- cluster-role.yaml
- deployment.yaml
- service-account.yaml
- service.yaml
commonLabels:
kustomize.component: pytorch-operator
images:
- name: gcr.io/kubeflow-images-public/pytorch-operator
newName: gcr.io/kubeflow-images-public/pytorch-operator
newTag: vmaster-g518f9c76
3 changes: 3 additions & 0 deletions manifests/pytorch-operator/base/params.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pytorchDefaultImage=null
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can remove this file

deploymentScope=cluster
deploymentNamespace=null
6 changes: 6 additions & 0 deletions manifests/pytorch-operator/base/service-account.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app: pytorch-operator
name: pytorch-operator
19 changes: 19 additions & 0 deletions manifests/pytorch-operator/base/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
apiVersion: v1
kind: Service
metadata:
annotations:
prometheus.io/path: /metrics
prometheus.io/port: "8443"
prometheus.io/scrape: "true"
labels:
app: pytorch-operator
name: pytorch-operator
spec:
ports:
- name: monitoring-port
port: 8443
targetPort: 8443
selector:
name: pytorch-operator
type: ClusterIP

44 changes: 44 additions & 0 deletions manifests/pytorch-operator/overlays/application/application.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
apiVersion: app.k8s.io/v1beta1
kind: Application
metadata:
name: pytorch-operator
spec:
selector:
matchLabels:
app.kubernetes.io/name: pytorch-operator
app.kubernetes.io/instance: pytorch-operator-v0.7.0
app.kubernetes.io/version: v0.7.0
app.kubernetes.io/component: pytorch
app.kubernetes.io/part-of: kubeflow
app.kubernetes.io/managed-by: kfctl
componentKinds:
- group: core
kind: Service
- group: apps
kind: Deployment
- group: core
kind: ConfigMap
- group: core
kind: ServiceAccount
- group: kubeflow.org
kind: PyTorchJob
descriptor:
type: "pytorch-operator"
version: "v1"
description: "Pytorch-operator allows users to create and manage the \"PyTorchJob\" custom resource."
maintainers:
- name: Johnu George
email: [email protected]
owners:
- name: Johnu George
email: [email protected]
keywords:
- "pytorchjob"
- "pytorch-operator"
- "pytorch-training"
links:
- description: About
url: "https://github.com/kubeflow/pytorch-operator"
- description: Docs
url: "https://www.kubeflow.org/docs/reference/pytorchjob/v1/pytorch/"
addOwnerRef: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiVersion: kustomize.config.k8s.io/v1beta1
bases:
- ../../base
commonLabels:
app.kubernetes.io/component: pytorch
app.kubernetes.io/name: pytorch-operator
kind: Kustomization
resources:
- application.yaml