diff --git a/chart-infra/templates/cleanup-operator-roles.yaml b/chart-infra/templates/cleanup-operator-roles.yaml new file mode 100644 index 00000000..83735601 --- /dev/null +++ b/chart-infra/templates/cleanup-operator-roles.yaml @@ -0,0 +1,45 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: cleanup-operator +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: cleanup-operator +rules: +- apiGroups: [""] + resources: + - pods + verbs: + - get + - list + - watch + - delete +- apiGroups: ["batch", "extensions"] + resources: + - jobs + verbs: + - get + - list + - watch + - delete +- apiGroups: ["apps"] + resources: + - deployments + - deployments/scale + verbs: + - get + - patch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: cleanup-operator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: cleanup-operator +subjects: +- kind: ServiceAccount + name: cleanup-operator \ No newline at end of file diff --git a/chart-infra/templates/kube-cleanup-operator-deployment.yaml b/chart-infra/templates/kube-cleanup-operator-deployment.yaml deleted file mode 100644 index 772273f1..00000000 --- a/chart-infra/templates/kube-cleanup-operator-deployment.yaml +++ /dev/null @@ -1,92 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - labels: - run: cleanup-operator - sandboxId: "{{ .Values.sandboxId }}" - name: cleanup-operator - namespace: {{.Release.Namespace}} -spec: - replicas: 1 - selector: - matchLabels: - run: cleanup-operator - sandboxId: "{{ .Values.sandboxId }}" - strategy: - rollingUpdate: - maxSurge: 1 - maxUnavailable: 1 - type: RollingUpdate - template: - metadata: - labels: - run: cleanup-operator - sandboxId: "{{ .Values.sandboxId }}" - spec: - serviceAccountName: cleanup-operator - containers: - - args: - - --namespace={{.Release.Namespace}} - # delete orphaned pods after 1s, so successfully run - # workers wont linger around - - --legacy-mode=false - - --delete-orphaned-pods-after=1s - image: quay.io/lwolf/kube-cleanup-operator - imagePullPolicy: Always - name: cleanup-operator - resources: - requests: - cpu: 50m - memory: 50Mi - limits: - cpu: 50m - memory: 50Mi - dnsPolicy: ClusterFirst - restartPolicy: Always - terminationGracePeriodSeconds: 30 ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: cleanup-operator ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: cleanup-operator -rules: -- apiGroups: [""] - resources: - - pods - verbs: - - get - - list - - watch - - delete -- apiGroups: ["batch", "extensions"] - resources: - - jobs - verbs: - - get - - list - - watch - - delete -- apiGroups: ["apps"] - resources: - - deployments - - deployments/scale - verbs: - - get - - patch ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: cleanup-operator -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: cleanup-operator -subjects: -- kind: ServiceAccount - name: cleanup-operator \ No newline at end of file diff --git a/chart-infra/templates/kubernetes-event-exporter-config.yaml b/chart-infra/templates/kubernetes-event-exporter-config.yaml new file mode 100644 index 00000000..4ebe30d1 --- /dev/null +++ b/chart-infra/templates/kubernetes-event-exporter-config.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: event-exporter-cfg + namespace: {{.Release.Namespace}} +data: + config.yaml: | + logLevel: info + logFormat: json + route: + routes: + - match: + - kind: "Pod" + receiver: "cleanup" + receivers: + - name: "cleanup" + webhook: + endpoint: "https://api-{{ .Values.sandboxId }}.scp-staging.biomage.net/v1/kubernetesEvents" + headers: + User-Agent: kube-event-exporter 1.0 + diff --git a/chart-infra/templates/kubernetes-event-exporter-deployment.yaml b/chart-infra/templates/kubernetes-event-exporter-deployment.yaml new file mode 100644 index 00000000..dbb17903 --- /dev/null +++ b/chart-infra/templates/kubernetes-event-exporter-deployment.yaml @@ -0,0 +1,31 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: event-exporter + namespace: {{ .Release.Namespace }} +spec: + replicas: 1 + template: + metadata: + labels: + app: event-exporter + version: v1 + spec: + serviceAccountName: event-exporter + containers: + - name: event-exporter + image: ghcr.io/opsgenie/kubernetes-event-exporter:v0.10 + imagePullPolicy: IfNotPresent + args: + - -conf=/data/config.yaml + volumeMounts: + - mountPath: /data + name: cfg + volumes: + - name: cfg + configMap: + name: event-exporter-cfg + selector: + matchLabels: + app: event-exporter + version: v1 \ No newline at end of file diff --git a/chart-infra/templates/kubernetes-event-exporter-roles.yaml b/chart-infra/templates/kubernetes-event-exporter-roles.yaml new file mode 100644 index 00000000..2f8a30bb --- /dev/null +++ b/chart-infra/templates/kubernetes-event-exporter-roles.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + namespace: {{.Release.Namespace}} + name: event-exporter +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: event-exporter +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: view +subjects: + - kind: ServiceAccount + namespace: {{.Release.Namespace}} + name: event-exporter \ No newline at end of file diff --git a/pipeline-runner/R/gem2s-7-upload_to_aws.R b/pipeline-runner/R/gem2s-7-upload_to_aws.R index f718ff32..d08dfc60 100644 --- a/pipeline-runner/R/gem2s-7-upload_to_aws.R +++ b/pipeline-runner/R/gem2s-7-upload_to_aws.R @@ -29,6 +29,7 @@ upload_to_aws <- function(input, pipeline_config, prev_out) { fpath <- file.path(tempdir(), 'experiment.rds') saveRDS(scdata, fpath, compress = FALSE) + # can only upload up to 50Gb because part numbers can be any number from 1 to 10,000, inclusive. put_object_in_s3_multipart(pipeline_config, bucket = pipeline_config$source_bucket, object = fpath, diff --git a/pipeline-runner/R/handle_data.R b/pipeline-runner/R/handle_data.R index 28bf7732..6ce3da21 100644 --- a/pipeline-runner/R/handle_data.R +++ b/pipeline-runner/R/handle_data.R @@ -37,6 +37,7 @@ send_output_to_api <- function(pipeline_config, input, plot_data_keys, output) { msg <- list( experimentId = input$experimentId, + taskName = input$taskName, input = input, output = list( bucket = pipeline_config$results_bucket, diff --git a/pipeline-runner/init.R b/pipeline-runner/init.R index 80d02e7b..afc14a32 100644 --- a/pipeline-runner/init.R +++ b/pipeline-runner/init.R @@ -346,7 +346,7 @@ init <- function() { cause = error_txt ) - send_pipeline_fail_update(pipeline_config, input_parse$experimentId, input_parse$processName, "Error message placeholder") + send_pipeline_fail_update(pipeline_config, input_parse$experimentId, input_parse$processName, error_txt) message("Sent task failure to state machine task: ", taskToken) message("recovered from error:", e$message)