From 09f81eaae310ab1a591dab7e84dde1285d33b169 Mon Sep 17 00:00:00 2001 From: deepak-muley Date: Tue, 14 Sep 2021 11:09:40 -0700 Subject: [PATCH 1/5] 1399: added pod matching label in service selector --- manifests/base/service.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manifests/base/service.yaml b/manifests/base/service.yaml index e95f9f4727..2dd6f7c6c7 100644 --- a/manifests/base/service.yaml +++ b/manifests/base/service.yaml @@ -7,7 +7,7 @@ metadata: prometheus.io/scrape: "true" prometheus.io/port: "8443" labels: - app: training-operator + control-plane: kubeflow-training-operator name: training-operator spec: ports: From 77b58a3f2d503019b4e2d330463ce5038ad958d4 Mon Sep 17 00:00:00 2001 From: deepak-muley Date: Tue, 14 Sep 2021 11:24:29 -0700 Subject: [PATCH 2/5] correct fix --- manifests/base/service.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/manifests/base/service.yaml b/manifests/base/service.yaml index 2dd6f7c6c7..3ccd5c36c9 100644 --- a/manifests/base/service.yaml +++ b/manifests/base/service.yaml @@ -7,7 +7,7 @@ metadata: prometheus.io/scrape: "true" prometheus.io/port: "8443" labels: - control-plane: kubeflow-training-operator + app: training-operator name: training-operator spec: ports: @@ -15,5 +15,5 @@ spec: port: 8443 targetPort: 8443 selector: - name: training-operator + control-plane: kubeflow-training-operator type: ClusterIP From c6d6414ec3373416143ad932d15cd287d6be29ea Mon Sep 17 00:00:00 2001 From: deepak-muley Date: Tue, 14 Sep 2021 12:40:25 -0700 Subject: [PATCH 3/5] 2010: fix to expose correct monitoring port training-operator exposes 8080 per logs 2021-09-14T19:31:49.294Z INFO controller-runtime.metrics metrics server is starting to listen {"addr": ":8080"} hence when we do local testing, it works fine but when we expose it using make deploy then inside k8s, its not exposed --- manifests/base/deployment.yaml | 2 ++ manifests/base/service.yaml | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/manifests/base/deployment.yaml b/manifests/base/deployment.yaml index 416d5ebf5e..536b782dea 100644 --- a/manifests/base/deployment.yaml +++ b/manifests/base/deployment.yaml @@ -21,6 +21,8 @@ spec: - /manager image: kubeflow/training-operator name: training-operator + ports: + - containerPort: 8080 env: - name: MY_POD_NAMESPACE valueFrom: diff --git a/manifests/base/service.yaml b/manifests/base/service.yaml index 3ccd5c36c9..78a0871a8c 100644 --- a/manifests/base/service.yaml +++ b/manifests/base/service.yaml @@ -12,8 +12,8 @@ metadata: spec: ports: - name: monitoring-port - port: 8443 - targetPort: 8443 + port: 8080 + targetPort: 8080 selector: control-plane: kubeflow-training-operator type: ClusterIP From 3e6467542c010d6500fc62ecf3a1f559429a4fb8 Mon Sep 17 00:00:00 2001 From: deepak-muley Date: Tue, 14 Sep 2021 12:44:33 -0700 Subject: [PATCH 4/5] reverting fix from other fix-branch --- manifests/base/service.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manifests/base/service.yaml b/manifests/base/service.yaml index 78a0871a8c..7ac9e3046f 100644 --- a/manifests/base/service.yaml +++ b/manifests/base/service.yaml @@ -15,5 +15,5 @@ spec: port: 8080 targetPort: 8080 selector: - control-plane: kubeflow-training-operator + name: training-operator type: ClusterIP From 80355563deea04f35057d1068a11d0c3b93e34f6 Mon Sep 17 00:00:00 2001 From: deepak-muley Date: Tue, 14 Sep 2021 12:49:24 -0700 Subject: [PATCH 5/5] now prometheus will scrape on 8080 --- manifests/base/service.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manifests/base/service.yaml b/manifests/base/service.yaml index 7ac9e3046f..bfc94be622 100644 --- a/manifests/base/service.yaml +++ b/manifests/base/service.yaml @@ -5,7 +5,7 @@ metadata: annotations: prometheus.io/path: /metrics prometheus.io/scrape: "true" - prometheus.io/port: "8443" + prometheus.io/port: "8080" labels: app: training-operator name: training-operator