Skip to content

Commit

Permalink
Merge pull request #776 from ecordell/progressing-metrics
Browse files Browse the repository at this point in the history
chore(catalog): add ClusterOperator status for catalog operator
  • Loading branch information
openshift-merge-robot authored Mar 23, 2019
2 parents 2721a2c + bda51cd commit cebef53
Show file tree
Hide file tree
Showing 55 changed files with 1,381 additions and 3,244 deletions.
27 changes: 26 additions & 1 deletion cmd/catalog/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@ import (
"strings"
"time"

configv1client "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1"
"github.com/operator-framework/operator-lifecycle-manager/pkg/lib/operatorclient"
"github.com/operator-framework/operator-lifecycle-manager/pkg/lib/operatorstatus"
"github.com/prometheus/client_golang/prometheus/promhttp"
log "github.com/sirupsen/logrus"
v1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/clientcmd"

"github.com/operator-framework/operator-lifecycle-manager/pkg/controller/operators/catalog"
"github.com/operator-framework/operator-lifecycle-manager/pkg/lib/signals"
Expand All @@ -22,6 +26,7 @@ const (
defaultWakeupInterval = 15 * time.Minute
defaultCatalogNamespace = "openshift-operator-lifecycle-manager"
defaultConfigMapServerImage = "quay.io/operatorframework/configmap-operator-registry:latest"
defaultOperatorName = ""
)

// config flags defined globally so that they appear on the test binary as well
Expand All @@ -41,6 +46,9 @@ var (
configmapServerImage = flag.String(
"configmapServerImage", defaultConfigMapServerImage, "the image to use for serving the operator registry api for a configmap")

writeStatusName = flag.String(
"writeStatusName", defaultOperatorName, "ClusterOperator name in which to write status, set to \"\" to disable.")

debug = flag.Bool(
"debug", false, "use debug log level")

Expand Down Expand Up @@ -87,6 +95,17 @@ func main() {
}
logger.Infof("log level %s", logger.Level)

// create a config client for operator status
config, err := clientcmd.BuildConfigFromFlags("", *kubeConfigPath)
if err != nil {
log.Fatalf("error configuring client: %s", err.Error())
}
configClient, err := configv1client.NewForConfig(config)
if err != nil {
log.Fatalf("error configuring client: %s", err.Error())
}
opClient := operatorclient.NewClientFromConfig(*kubeConfigPath, logger)

// Create a new instance of the operator.
catalogOperator, err := catalog.NewOperator(*kubeConfigPath, logger, *wakeupInterval, *configmapServerImage, *catalogNamespace, namespaces...)
if err != nil {
Expand All @@ -96,6 +115,12 @@ func main() {
http.Handle("/metrics", promhttp.Handler())
go http.ListenAndServe(":8081", nil)

_, done, _ := catalogOperator.Run(stopCh)
ready, done, sync := catalogOperator.Run(stopCh)
<-ready

if *writeStatusName != "" {
operatorstatus.MonitorClusterStatus(*writeStatusName, sync, stopCh, opClient, configClient)
}

<-done
}
198 changes: 3 additions & 195 deletions cmd/olm/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,16 @@ import (
"fmt"
"net/http"
"os"
"reflect"
"strings"
"time"

configv1client "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1"
"github.com/operator-framework/operator-lifecycle-manager/pkg/lib/operatorstatus"
"github.com/prometheus/client_golang/prometheus/promhttp"
log "github.com/sirupsen/logrus"
v1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/discovery"
"k8s.io/client-go/tools/clientcmd"

configv1 "github.com/openshift/api/config/v1"
configv1client "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1"
"github.com/operator-framework/operator-lifecycle-manager/pkg/api/client"
"github.com/operator-framework/operator-lifecycle-manager/pkg/controller/install"
"github.com/operator-framework/operator-lifecycle-manager/pkg/controller/operators/olm"
Expand Down Expand Up @@ -132,194 +126,8 @@ func main() {
<-ready

if *writeStatusName != "" {
monitorClusterStatus(sync, stopCh, opClient, configClient)
operatorstatus.MonitorClusterStatus(*writeStatusName, sync, stopCh, opClient, configClient)
}

<-done
}

func monitorClusterStatus(syncCh chan error, stopCh <-chan struct{}, opClient operatorclient.ClientInterface, configClient configv1client.ConfigV1Interface) {
var (
syncs int
successfulSyncs int
hasClusterOperator bool
)
go wait.Until(func() {
// slow poll until we see a cluster operator API, which could be never
if !hasClusterOperator {
opStatusGV := schema.GroupVersion{
Group: "config.openshift.io",
Version: "v1",
}
err := discovery.ServerSupportsVersion(opClient.KubernetesInterface().Discovery(), opStatusGV)
if err != nil {
log.Infof("ClusterOperator api not present, skipping update (%v)", err)
time.Sleep(time.Minute)
return
}
hasClusterOperator = true
}

// Sample the sync channel and see whether we're successfully retiring syncs as a
// proxy for "working" (we can't know when we hit level, but we can at least verify
// we are seeing some syncs succeeding). Once we observe at least one successful
// sync we can begin reporting available and level.
select {
case err, ok := <-syncCh:
if !ok {
// syncCh should only close if the Run() loop exits
time.Sleep(5 * time.Second)
log.Fatalf("Status sync channel closed but process did not exit in time")
}
syncs++
if err == nil {
successfulSyncs++
}
// grab any other sync events that have accumulated
for len(syncCh) > 0 {
if err := <-syncCh; err == nil {
successfulSyncs++
}
syncs++
}
// if we haven't yet accumulated enough syncs, wait longer
// TODO: replace these magic numbers with a better measure of syncs across all queueInformers
if successfulSyncs < 5 || syncs < 10 {
log.Printf("Waiting to observe more successful syncs")
return
}
}

// create the cluster operator in an initial state if it does not exist
existing, err := configClient.ClusterOperators().Get(*writeStatusName, metav1.GetOptions{})
if k8serrors.IsNotFound(err) {
log.Info("Existing operator status not found, creating")
created, createErr := configClient.ClusterOperators().Create(&configv1.ClusterOperator{
ObjectMeta: metav1.ObjectMeta{
Name: *writeStatusName,
},
Status: configv1.ClusterOperatorStatus{
Conditions: []configv1.ClusterOperatorStatusCondition{
configv1.ClusterOperatorStatusCondition{
Type: configv1.OperatorProgressing,
Status: configv1.ConditionTrue,
Message: fmt.Sprintf("Installing %s", olmversion.OLMVersion),
LastTransitionTime: metav1.Now(),
},
configv1.ClusterOperatorStatusCondition{
Type: configv1.OperatorFailing,
Status: configv1.ConditionFalse,
LastTransitionTime: metav1.Now(),
},
configv1.ClusterOperatorStatusCondition{
Type: configv1.OperatorAvailable,
Status: configv1.ConditionFalse,
LastTransitionTime: metav1.Now(),
},
},
},
})
if createErr != nil {
log.Errorf("Failed to create cluster operator: %v\n", createErr)
return
}
existing = created
err = nil
}
if err != nil {
log.Errorf("Unable to retrieve cluster operator: %v", err)
return
}

// update the status with the appropriate state
previousStatus := existing.Status.DeepCopy()
switch {
case successfulSyncs > 0:
setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
Type: configv1.OperatorFailing,
Status: configv1.ConditionFalse,
})
setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
Type: configv1.OperatorProgressing,
Status: configv1.ConditionFalse,
Message: fmt.Sprintf("Deployed %s", olmversion.OLMVersion),
})
setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
Type: configv1.OperatorAvailable,
Status: configv1.ConditionTrue,
})
// we set the versions array when all the latest code is deployed and running - in this case,
// the sync method is responsible for guaranteeing that happens before it returns nil
if version := os.Getenv("RELEASE_VERSION"); len(version) > 0 {
existing.Status.Versions = []configv1.OperandVersion{
{
Name: "operator",
Version: version,
},
{
Name: "operator-lifecycle-manager",
Version: olmversion.OLMVersion,
},
}
} else {
existing.Status.Versions = nil
}
default:
setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
Type: configv1.OperatorFailing,
Status: configv1.ConditionTrue,
Message: "Waiting for updates to take effect",
})
setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
Type: configv1.OperatorProgressing,
Status: configv1.ConditionFalse,
Message: fmt.Sprintf("Waiting to see update %s succeed", olmversion.OLMVersion),
})
// TODO: use % errors within a window to report available
}

// update the status
if !reflect.DeepEqual(previousStatus, &existing.Status) {
if _, err := configClient.ClusterOperators().UpdateStatus(existing); err != nil {
log.Errorf("Unable to update cluster operator status: %v", err)
}
}

// if we've reported success, we can sleep longer, otherwise we want to keep watching for
// successful
if successfulSyncs > 0 {
time.Sleep(5 * time.Minute)
}

}, 5*time.Second, stopCh)
}

func setOperatorStatusCondition(conditions *[]configv1.ClusterOperatorStatusCondition, newCondition configv1.ClusterOperatorStatusCondition) {
if conditions == nil {
conditions = &[]configv1.ClusterOperatorStatusCondition{}
}
existingCondition := findOperatorStatusCondition(*conditions, newCondition.Type)
if existingCondition == nil {
newCondition.LastTransitionTime = metav1.NewTime(time.Now())
*conditions = append(*conditions, newCondition)
return
}

if existingCondition.Status != newCondition.Status {
existingCondition.Status = newCondition.Status
existingCondition.LastTransitionTime = newCondition.LastTransitionTime
}

existingCondition.Reason = newCondition.Reason
existingCondition.Message = newCondition.Message
}

func findOperatorStatusCondition(conditions []configv1.ClusterOperatorStatusCondition, conditionType configv1.ClusterStatusConditionType) *configv1.ClusterOperatorStatusCondition {
for i := range conditions {
if conditions[i].Type == conditionType {
return &conditions[i]
}
}

return nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ spec:
{{- if .Values.catalog.commandArgs }}
- {{ .Values.catalog.commandArgs }}
{{- end }}
{{- if .Values.writeStatusNameCatalog }}
- -writeStatusName
- {{ .Values.writeStatusNameCatalog }}
{{- end }}
image: {{ .Values.catalog.image.ref }}
imagePullPolicy: {{ .Values.catalog.image.pullPolicy }}
ports:
Expand Down
9 changes: 9 additions & 0 deletions deploy/chart/templates/0000_50_olm_14-operatorstatus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,13 @@ status:
versions:
- name: operator
version: "0.0.1-snapshot"
---
apiVersion: config.openshift.io/v1
kind: ClusterOperator
metadata:
name: {{ .Values.writeStatusNameCatalog }}
status:
versions:
- name: operator
version: "0.0.1-snapshot"
{{- end }}
1 change: 1 addition & 0 deletions deploy/ocp/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ catalog_namespace: openshift-operator-lifecycle-manager
operator_namespace: openshift-operators
imagestream: true
writeStatusName: operator-lifecycle-manager
writeStatusNameCatalog: operator-lifecycle-manager-catalog
olm:
replicaCount: 1
image:
Expand Down
8 changes: 4 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ require (
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b
github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef // indirect
github.com/golang/mock v1.1.1
github.com/google/btree v1.0.0 // indirect
github.com/gregjones/httpcache v0.0.0-20181110185634-c63ab54fda8f // indirect
github.com/grpc-ecosystem/grpc-gateway v1.7.0 // indirect
github.com/json-iterator/go v1.1.6 // indirect
github.com/maxbrunsfeld/counterfeiter v0.0.0-20181017030959-1aadac120687
github.com/openshift/api v3.9.1-0.20190129160438-bbc4289c54e0+incompatible
github.com/openshift/client-go v0.0.0-20190128154758-1540772775fa
github.com/openshift/library-go v0.0.0-20190125204812-22b2ba2f485f
github.com/openshift/api v3.9.1-0.20190321190659-71fdeba18656+incompatible
github.com/openshift/client-go v0.0.0-20190313214351-8ae2a9c33ba2
github.com/operator-framework/operator-registry v1.0.6
github.com/pkg/errors v0.8.0
github.com/prometheus/client_golang v0.9.1
Expand All @@ -42,5 +42,5 @@ require (
k8s.io/klog v0.2.0 // indirect
k8s.io/kube-aggregator v0.0.0-20190223015803-f706565beac0
k8s.io/kube-openapi v0.0.0-20181031203759-72693cb1fadd
k8s.io/kubernetes v1.11.9-beta.0.0.20190311041124-ede55fd57298
k8s.io/kubernetes v1.11.9-beta.0.0.20190321231218-16236ce91790
)
Loading

0 comments on commit cebef53

Please sign in to comment.