Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't use old hardcoded PD API key secret #107

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion hack/olm-registry/olm-artifacts-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ objects:
sourceType: grpc
image: ${REGISTRY_IMG}:${CHANNEL}-${IMAGE_TAG}
displayName: pagerduty-operator Registry
publisher: SRE
publisher: SRE

- apiVersion: operators.coreos.com/v1alpha2
kind: OperatorGroup
Expand Down Expand Up @@ -67,3 +67,20 @@ objects:
targetSecretRef:
name: pd-secret
namespace: openshift-monitoring

- apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: pagerduty-integration-api-secret
namespace: pagerduty-operator
spec:
groups:
- name: pagerduty-integration-api-secret
rules:
- alert: PagerDutyIntegrationAPISecretError
annotations:
message: PagerDuty Operator is failing to load PAGERDUTY_API_KEY from Secret specified in PagerDutyIntegration {{ $labels.pagerdutyintegration_name }}. Either the Secret might be missing, or the key might be missing from within the Secret.
expr: pagerdutyintegration_secret_loaded < 1
for: 15m
labels:
severity: warning
10 changes: 5 additions & 5 deletions pkg/controller/pagerdutyintegration/clusterdeployment_created.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
)

func (r *ReconcilePagerDutyIntegration) handleCreate(pdi *pagerdutyv1alpha1.PagerDutyIntegration, cd *hivev1.ClusterDeployment) error {
func (r *ReconcilePagerDutyIntegration) handleCreate(pdclient pd.Client, pdi *pagerdutyv1alpha1.PagerDutyIntegration, cd *hivev1.ClusterDeployment) error {
var (
// secretName is the name of the Secret deployed to the target
// cluster, and also the name of the SyncSet that causes it to
Expand Down Expand Up @@ -96,15 +96,15 @@ func (r *ReconcilePagerDutyIntegration) handleCreate(pdi *pagerdutyv1alpha1.Page
err = pdData.ParseClusterConfig(r.client, cd.Namespace, configMapName)
if err != nil {
var createErr error
_, createErr = r.pdclient.CreateService(pdData)
_, createErr = pdclient.CreateService(pdData)
if createErr != nil {
localmetrics.UpdateMetricPagerDutyCreateFailure(1, ClusterID)
localmetrics.UpdateMetricPagerDutyCreateFailure(1, ClusterID, pdi.Name)
return createErr
}
}
localmetrics.UpdateMetricPagerDutyCreateFailure(0, ClusterID)
localmetrics.UpdateMetricPagerDutyCreateFailure(0, ClusterID, pdi.Name)

pdIntegrationKey, err = r.pdclient.GetIntegrationKey(pdData)
pdIntegrationKey, err = pdclient.GetIntegrationKey(pdData)
if err != nil {
return err
}
Expand Down
19 changes: 15 additions & 4 deletions pkg/controller/pagerdutyintegration/clusterdeployment_deleted.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import (
"k8s.io/apimachinery/pkg/types"
)

func (r *ReconcilePagerDutyIntegration) handleDelete(pdi *pagerdutyv1alpha1.PagerDutyIntegration, cd *hivev1.ClusterDeployment) error {
func (r *ReconcilePagerDutyIntegration) handleDelete(pdclient pd.Client, pdi *pagerdutyv1alpha1.PagerDutyIntegration, cd *hivev1.ClusterDeployment) error {
var (
// secretName is the name of the Secret deployed to the target
// cluster, and also the name of the SyncSet that causes it to
Expand Down Expand Up @@ -119,7 +119,7 @@ func (r *ReconcilePagerDutyIntegration) handleDelete(pdi *pagerdutyv1alpha1.Page

if deletePDService {
// we have everything necessary to attempt deletion of the PD service
err = r.pdclient.DeleteService(pdData)
err = pdclient.DeleteService(pdData)
if err != nil {
r.reqLogger.Error(err, "Failed cleaning up pagerduty.")
} else {
Expand Down Expand Up @@ -155,11 +155,22 @@ func (r *ReconcilePagerDutyIntegration) handleDelete(pdi *pagerdutyv1alpha1.Page
utils.DeleteFinalizer(cd, finalizer)
err = r.client.Update(context.TODO(), cd)
if err != nil {
metrics.UpdateMetricPagerDutyDeleteFailure(1, ClusterID)
metrics.UpdateMetricPagerDutyDeleteFailure(1, ClusterID, pdi.Name)
return err
}
}
metrics.UpdateMetricPagerDutyDeleteFailure(0, ClusterID)

if utils.HasFinalizer(cd, config.OperatorFinalizer) {
r.reqLogger.Info("Deleting old PD finalizer from ClusterDeployment", "Namespace", cd.Namespace, "Name", cd.Name)
utils.DeleteFinalizer(cd, config.OperatorFinalizer)
err = r.client.Update(context.TODO(), cd)
if err != nil {
metrics.UpdateMetricPagerDutyDeleteFailure(1, ClusterID, pdi.Name)
return err
}
}

metrics.UpdateMetricPagerDutyDeleteFailure(0, ClusterID, pdi.Name)

return nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -52,29 +52,16 @@ var log = logf.Log.WithName("controller_pagerdutyintegration")
// Add creates a new PagerDutyIntegration Controller and adds it to the Manager. The Manager will set fields on the Controller
// and Start it when the Manager is Started.
func Add(mgr manager.Manager) error {
newRec, err := newReconciler(mgr)
if err != nil {
return err
}

return add(mgr, newRec)
return add(mgr, newReconciler(mgr))
}

// newReconciler returns a new reconcile.Reconciler
func newReconciler(mgr manager.Manager) (reconcile.Reconciler, error) {
tempClient, err := client.New(mgr.GetConfig(), client.Options{Scheme: mgr.GetScheme()})
if err != nil {
return nil, err
}

// get PD API key from secret
pdAPIKey, err := utils.LoadSecretData(tempClient, config.PagerDutyAPISecretName, config.OperatorNamespace, config.PagerDutyAPISecretKey)

func newReconciler(mgr manager.Manager) reconcile.Reconciler {
return &ReconcilePagerDutyIntegration{
client: utils.NewClientWithMetricsOrDie(log, mgr, controllerName),
scheme: mgr.GetScheme(),
pdclient: pd.NewClient(pdAPIKey),
}, nil
pdclient: pd.NewClient,
}
}

// add adds a new Controller to mgr with r as the reconcile.Reconciler
Expand Down Expand Up @@ -159,7 +146,7 @@ type ReconcilePagerDutyIntegration struct {
client client.Client
scheme *runtime.Scheme
reqLogger logr.Logger
pdclient pd.Client
pdclient func(APIKey string) pd.Client
}

// Reconcile reads that state of the cluster for a PagerDutyIntegration object and makes changes based on the state read
Expand Down Expand Up @@ -198,15 +185,31 @@ func (r *ReconcilePagerDutyIntegration) Reconcile(request reconcile.Request) (re
return r.requeueOnErr(err)
}

pdApiKey, err := utils.LoadSecretData(
r.client,
pdi.Spec.PagerdutyApiKeySecretRef.Name,
pdi.Spec.PagerdutyApiKeySecretRef.Namespace,
config.PagerDutyAPISecretKey,
)
if err != nil {
r.reqLogger.Error(err, "Failed to load PagerDuty API key from Secret listed in PagerDutyIntegration CR")
localmetrics.UpdateMetricPagerDutyIntegrationSecretLoaded(0, pdi.Name)
return r.requeueAfter(10 * time.Minute)
}
localmetrics.UpdateMetricPagerDutyIntegrationSecretLoaded(1, pdi.Name)
pdClient := r.pdclient(pdApiKey)

if pdi.DeletionTimestamp != nil {
if utils.HasFinalizer(pdi, config.OperatorFinalizer) {
for _, cd := range matchingClusterDeployments.Items {
err := r.handleDelete(pdi, &cd)
err := r.handleDelete(pdClient, pdi, &cd)
if err != nil {
return r.requeueOnErr(err)
}
}

localmetrics.DeleteMetricPagerDutyIntegrationSecretLoaded(pdi.Name)

utils.DeleteFinalizer(pdi, config.OperatorFinalizer)
err = r.client.Update(context.TODO(), pdi)
if err != nil {
Expand All @@ -226,12 +229,12 @@ func (r *ReconcilePagerDutyIntegration) Reconcile(request reconcile.Request) (re

for _, cd := range matchingClusterDeployments.Items {
if cd.DeletionTimestamp != nil || cd.Labels[config.ClusterDeploymentNoalertsLabel] == "true" {
err := r.handleDelete(pdi, &cd)
err := r.handleDelete(pdClient, pdi, &cd)
if err != nil {
return r.requeueOnErr(err)
}
} else {
err := r.handleCreate(pdi, &cd)
err := r.handleCreate(pdClient, pdi, &cd)
if err != nil {
return r.requeueOnErr(err)
}
Expand Down Expand Up @@ -259,3 +262,7 @@ func (r *ReconcilePagerDutyIntegration) doNotRequeue() (reconcile.Result, error)
func (r *ReconcilePagerDutyIntegration) requeueOnErr(err error) (reconcile.Result, error) {
return reconcile.Result{}, err
}

func (r *ReconcilePagerDutyIntegration) requeueAfter(t time.Duration) (reconcile.Result, error) {
return reconcile.Result{RequeueAfter: t}, nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
pagerdutyapis "github.com/openshift/pagerduty-operator/pkg/apis"
pagerdutyv1alpha1 "github.com/openshift/pagerduty-operator/pkg/apis/pagerduty/v1alpha1"
"github.com/openshift/pagerduty-operator/pkg/kube"
pd "github.com/openshift/pagerduty-operator/pkg/pagerduty"
mockpd "github.com/openshift/pagerduty-operator/pkg/pagerduty/mock"
"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -359,6 +360,7 @@ func TestReconcilePagerDutyIntegration(t *testing.T) {
localObjects: []runtime.Object{
uninstalledClusterDeployment(),
testPagerDutyIntegration(),
testPDConfigSecret(),
},
expectedSyncSets: &SyncSetEntry{},
expectedSecrets: &SecretEntry{},
Expand Down Expand Up @@ -426,6 +428,7 @@ func TestReconcilePagerDutyIntegration(t *testing.T) {
localObjects: []runtime.Object{
unlabelledClusterDeployment(),
testPagerDutyIntegration(),
testPDConfigSecret(),
},
expectedSyncSets: &SyncSetEntry{},
expectedSecrets: &SecretEntry{},
Expand All @@ -439,6 +442,7 @@ func TestReconcilePagerDutyIntegration(t *testing.T) {
localObjects: []runtime.Object{
testNoalertsClusterDeployment(),
testPagerDutyIntegration(),
testPDConfigSecret(),
},
expectedSyncSets: &SyncSetEntry{},
expectedSecrets: &SecretEntry{},
Expand Down Expand Up @@ -502,7 +506,7 @@ func TestReconcilePagerDutyIntegration(t *testing.T) {
rpdi := &ReconcilePagerDutyIntegration{
client: mocks.fakeKubeClient,
scheme: scheme.Scheme,
pdclient: mocks.mockPDClient,
pdclient: func(s string) pd.Client { return mocks.mockPDClient },
}

// Act [2x as first exits early after setting finalizer]
Expand Down Expand Up @@ -556,7 +560,7 @@ func TestRemoveAlertsAfterCreate(t *testing.T) {
rpdi := &ReconcilePagerDutyIntegration{
client: mocks.fakeKubeClient,
scheme: scheme.Scheme,
pdclient: mocks.mockPDClient,
pdclient: func(s string) pd.Client { return mocks.mockPDClient },
}

// Act (create) [2x as first exits early after setting finalizer]
Expand Down Expand Up @@ -636,7 +640,7 @@ func TestDeleteSecret(t *testing.T) {
rpdi := &ReconcilePagerDutyIntegration{
client: mocks.fakeKubeClient,
scheme: scheme.Scheme,
pdclient: mocks.mockPDClient,
pdclient: func(s string) pd.Client { return mocks.mockPDClient },
}

// Act (create) [2x as first exits early after setting finalizer]
Expand Down
50 changes: 39 additions & 11 deletions pkg/localmetrics/localmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@ var (
Name: "pagerduty_create_failure",
Help: "Metric for the failure of creating a cluster deployment.",
ConstLabels: prometheus.Labels{"name": "pagerduty-operator"},
}, []string{"clusterdeployment_name"})
}, []string{"clusterdeployment_name", "pagerdutyintegration_name"})

MetricPagerDutyDeleteFailure = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "pagerduty_delete_failure",
Help: "Metric for the failure of deleting a cluster deployment.",
ConstLabels: prometheus.Labels{"name": "pagerduty-operator"},
}, []string{"clusterdeployment_name"})
}, []string{"clusterdeployment_name", "pagerdutyintegration_name"})

MetricPagerDutyHeartbeat = prometheus.NewSummary(prometheus.SummaryOpts{
Name: "pagerduty_heartbeat",
Expand All @@ -69,16 +69,23 @@ var (
Buckets: []float64{1},
}, []string{"controller", "method", "resource", "status"})

MetricPagerDutyIntegrationSecretLoaded = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "pagerdutyintegration_secret_loaded",
Help: "Metric to track the ability to load the PagerDuty API key from the Secret specified in the PagerDutyIntegration",
ConstLabels: prometheus.Labels{"name": "pagerduty-operator"},
}, []string{"pagerdutyintegration_name"})

MetricsList = []prometheus.Collector{
MetricPagerDutyCreateFailure,
MetricPagerDutyDeleteFailure,
MetricPagerDutyHeartbeat,
ApiCallDuration,
ReconcileDuration,
MetricPagerDutyIntegrationSecretLoaded,
}
)

// UpdateAPIMetrics updates all API endpoint metrics ever 5 minutes
// UpdateAPIMetrics updates all API endpoint metrics every 5 minutes
func UpdateAPIMetrics(APIKey string, timer *prometheus.Timer) {
d := time.Tick(5 * time.Minute)
for range d {
Expand All @@ -87,18 +94,39 @@ func UpdateAPIMetrics(APIKey string, timer *prometheus.Timer) {

}

// UpdateMetricPagerDutyCreateFailure updates guage to 1 when creation fails
func UpdateMetricPagerDutyCreateFailure(x int, cd string) {
// UpdateMetricPagerDutyIntegrationSecretLoaded updates gauge to 1
// when the PagerDuty API key can be loaded from the Secret specified
// in the PagerDutyIntegration, or to 0 if it fails
func UpdateMetricPagerDutyIntegrationSecretLoaded(x int, pdiName string) {
MetricPagerDutyIntegrationSecretLoaded.With(
prometheus.Labels{"pagerdutyintegration_name": pdiName},
).Set(float64(x))
}

// DeleteMetricPagerDutyIntegrationSecretLoaded deletes the metric for
// the PagerDutyIntegration name provided. This should be called when
// e.g. the PagerDutyIntegration is being deleted, so that there are
// no irrelevant metrics (which alerts could be firing on).
func DeleteMetricPagerDutyIntegrationSecretLoaded(pdiName string) bool {
return MetricPagerDutyIntegrationSecretLoaded.Delete(
prometheus.Labels{"pagerdutyintegration_name": pdiName},
)
}

// UpdateMetricPagerDutyCreateFailure updates gauge to 1 when creation fails
func UpdateMetricPagerDutyCreateFailure(x int, cd string, pdiName string) {
MetricPagerDutyCreateFailure.With(prometheus.Labels{
"clusterdeployment_name": cd}).Set(
float64(x))
"clusterdeployment_name": cd,
"pagerdutyintegration_name": pdiName,
}).Set(float64(x))
}

// UpdateMetricPagerDutyDeleteFailure updates guage to 1 when deletion fails
func UpdateMetricPagerDutyDeleteFailure(x int, cd string) {
// UpdateMetricPagerDutyDeleteFailure updates gauge to 1 when deletion fails
func UpdateMetricPagerDutyDeleteFailure(x int, cd string, pdiName string) {
MetricPagerDutyDeleteFailure.With(prometheus.Labels{
"clusterdeployment_name": cd}).Set(
float64(x))
"clusterdeployment_name": cd,
"pagerdutyintegration_name": pdiName,
}).Set(float64(x))
}

// SetReconcileDuration Push the duration of the reconcile iteration
Expand Down