Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ROX-15242 - Reduce fleetshard and fleet-manager log noise #785

Merged
merged 2 commits into from
Apr 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ internal_image_registry:=image-registry.openshift-image-registry.svc:5000
test_image:=test/$(IMAGE_NAME)

DOCKER ?= docker
DOCKER_CONFIG ?= "${PWD}/.docker"
DOCKER_CONFIG ?= "${HOME}/.docker"

# Default Variables
ACSMS_NAMESPACE ?= acsms
Expand Down
2 changes: 2 additions & 0 deletions fleetshard/pkg/central/reconciler/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ func (r *CentralReconciler) Reconcile(ctx context.Context, remoteCentral private
return nil, ErrCentralNotChanged
}

glog.Infof("Start reconcile central %s/%s", remoteCentral.Metadata.Namespace, remoteCentral.Metadata.Name)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think logging here will still result in many log messages, should be after line 113 to have effect.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And one more thing here, is it ok to log remoteCentral.Metadata.Name? It's customer data, and I remember that when I wanted to tag the DB instances with the name, we decided not to do it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure about if we should remove the name, but adding the ID would be good since this is most important to filter for logs of a specific tenant.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The namespace already contains the Id, but it would be better to replace Namespace with Id.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Convinced. I'll create a follow-up PR to this to keep the changes small.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Structured logging would be great now :)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Follow up PR: #856

Copy link
Contributor

@vladbologa vladbologa Mar 9, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think logging here will still result in many log messages, should be after line 113 to have effect.

Looks like my initial comment was forgotten because of the subsequent namespace / ID discussion.

I'd still move this log to after line 113, so that we avoid logging "Starting reconcile..." for every tenant, every 5-10 seconds. WDYT?

remoteCentralName := remoteCentral.Metadata.Name
remoteCentralNamespace := remoteCentral.Metadata.Namespace

Expand Down
7 changes: 5 additions & 2 deletions fleetshard/pkg/runtime/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/stackrox/acs-fleet-manager/fleetshard/pkg/k8s"
"github.com/stackrox/acs-fleet-manager/internal/dinosaur/pkg/api/private"
"github.com/stackrox/acs-fleet-manager/pkg/client/fleetmanager"
"github.com/stackrox/acs-fleet-manager/pkg/logger"
"github.com/stackrox/rox/pkg/concurrency"
"k8s.io/apimachinery/pkg/util/wait"
ctrlClient "sigs.k8s.io/controller-runtime/pkg/client"
Expand All @@ -29,6 +30,8 @@ import (
// TODO(SimonBaeumer): set a unique identifier for the map key, currently the instance name is used
type reconcilerRegistry map[string]*centralReconciler.CentralReconciler

var reconciledCentralCountCache int32

var backoff = wait.Backoff{
Duration: 1 * time.Second,
Factor: 1.5,
Expand Down Expand Up @@ -135,7 +138,8 @@ func (r *Runtime) Start() error {
}

// Start for each Central its own reconciler which can be triggered by sending a central to the receive channel.
glog.Infof("Received %d centrals", len(list.Items))
reconciledCentralCountCache = int32(len(list.Items))
logger.InfoChangedInt32(&reconciledCentralCountCache, "Received central count changed: received %d centrals", reconciledCentralCountCache)
for _, central := range list.Items {
if _, ok := r.reconcilers[central.Id]; !ok {
r.reconcilers[central.Id] = centralReconciler.NewCentralReconciler(r.k8sClient, central,
Expand All @@ -151,7 +155,6 @@ func (r *Runtime) Start() error {
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute)
defer cancel()

glog.Infof("Start reconcile central %s/%s", central.Metadata.Namespace, central.Metadata.Name)
status, err := reconciler.Reconcile(ctx, central)
fleetshardmetrics.MetricsInstance().IncCentralReconcilations()
r.handleReconcileResult(central, status, err)
Expand Down
17 changes: 10 additions & 7 deletions internal/dinosaur/pkg/workers/clusters_mgr.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package workers

import (
"fmt"
"strings"
"sync"

dinosaurConstants "github.com/stackrox/acs-fleet-manager/internal/dinosaur/constants"
"github.com/stackrox/acs-fleet-manager/internal/dinosaur/pkg/clusters/types"
Expand All @@ -12,9 +14,6 @@ import (
"github.com/stackrox/acs-fleet-manager/pkg/constants"
"github.com/stackrox/acs-fleet-manager/pkg/logger"

"strings"
"sync"

"github.com/goava/di"
"github.com/google/go-cmp/cmp"
"github.com/google/uuid"
Expand Down Expand Up @@ -59,6 +58,10 @@ const (
clusterAdminRoleName = "cluster-admin"
)

var (
readyClusterCount int32
)

var clusterMetricsStatuses = []api.ClusterStatus{
api.ClusterAccepted,
api.ClusterProvisioning,
Expand Down Expand Up @@ -322,9 +325,9 @@ func (c *ClusterManager) processReadyClusters() []error {
errs = append(errs, errors.Wrap(listErr, "failed to list ready clusters"))
return errs
}
if len(readyClusters) > 0 {
glog.V(10).Infof("ready clusters count = %d", len(readyClusters))
}

readyClusterCount = int32(len(readyClusters))
logger.InfoChangedInt32(&readyClusterCount, "ready clusters count = %d", readyClusterCount)

for _, readyCluster := range readyClusters {
emptyClusterReconciled := false
Expand Down Expand Up @@ -472,9 +475,9 @@ func (c *ClusterManager) reconcileClusterInstanceType(cluster api.Cluster) error
if err != nil {
return errors.Wrapf(err, "failed to update instance type in database for cluster %s", cluster.ClusterID)
}
logger.Logger.V(10).Infof("supported instance type for cluster = %s successful updated", cluster.ClusterID)
}

logger.Logger.V(10).Infof("supported instance type for cluster = %s successful updated", cluster.ClusterID)
return nil
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ func (k *AcceptedCentralManager) Stop() {
k.StopWorker(k)
}

var acceptedCentralCount int32

// Reconcile ...
func (k *AcceptedCentralManager) Reconcile() []error {
var encounteredErrors []error
Expand All @@ -68,9 +70,9 @@ func (k *AcceptedCentralManager) Reconcile() []error {
if serviceErr != nil {
encounteredErrors = append(encounteredErrors, errors.Wrap(serviceErr, "failed to list accepted centrals"))
}
if len(acceptedCentralRequests) > 0 {
glog.Infof("accepted centrals count = %d", len(acceptedCentralRequests))
}

acceptedCentralCount = int32(len(acceptedCentralRequests))
logger.InfoChangedInt32(&acceptedCentralCount, "accepted centrals count = %d", acceptedCentralCount)

for _, centralRequest := range acceptedCentralRequests {
glog.V(10).Infof("accepted central id = %s", centralRequest.ID)
Expand Down
17 changes: 6 additions & 11 deletions internal/dinosaur/pkg/workers/dinosaurmgrs/ready_dinosaurs_mgr.go
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
package dinosaurmgrs

import (
"github.com/golang/glog"
"github.com/google/uuid"
"github.com/pkg/errors"
constants2 "github.com/stackrox/acs-fleet-manager/internal/dinosaur/constants"
"github.com/stackrox/acs-fleet-manager/internal/dinosaur/pkg/services"
"github.com/stackrox/acs-fleet-manager/pkg/client/iam"
"github.com/stackrox/acs-fleet-manager/pkg/logger"
"github.com/stackrox/acs-fleet-manager/pkg/metrics"
"github.com/stackrox/acs-fleet-manager/pkg/services/sso"
"github.com/stackrox/acs-fleet-manager/pkg/workers"
)

const readyCentralWorkerType = "ready_dinosaur"

var readyCentralCountCache int32

// ReadyDinosaurManager represents a dinosaur manager that periodically reconciles dinosaur requests
type ReadyDinosaurManager struct {
workers.BaseWorker
Expand Down Expand Up @@ -49,21 +51,14 @@ func (k *ReadyDinosaurManager) Stop() {

// Reconcile ...
func (k *ReadyDinosaurManager) Reconcile() []error {

var encounteredErrors []error

readyDinosaurs, serviceErr := k.dinosaurService.ListByStatus(constants2.CentralRequestStatusReady)
readyCentrals, serviceErr := k.dinosaurService.ListByStatus(constants2.CentralRequestStatusReady)
if serviceErr != nil {
encounteredErrors = append(encounteredErrors, errors.Wrap(serviceErr, "failed to list ready centrals"))
}
if len(readyDinosaurs) > 0 {
glog.Infof("ready centrals count = %d", len(readyDinosaurs))
}

for _, dinosaur := range readyDinosaurs {
glog.V(10).Infof("ready central id = %s", dinosaur.ID)
// TODO implement reconciliation logic for ready dinosaurs
}
readyCentralCountCache = int32(len(readyCentrals))
logger.InfoChangedInt32(&readyCentralCountCache, "ready centrals count = %d", readyCentralCountCache)

return encounteredErrors
}
13 changes: 13 additions & 0 deletions pkg/logger/logger.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"context"
"fmt"
"strings"
"sync"

sentry "github.com/getsentry/sentry-go"

Expand Down Expand Up @@ -189,6 +190,18 @@ func (l *logger) InfoDepth(depth int, args ...interface{}) {
glog.InfoDepth(logDepth+depth, args...)
}

var cache = sync.Map{}

// InfoChangedInt32 logs whenever the value of the passed counter reference changes when it is called.
func InfoChangedInt32(counter *int32, msg string, args ...interface{}) {
val, loaded := cache.LoadOrStore(counter, *counter)
if loaded && val == *counter {
return // counter not changed, do not log
}

glog.InfoDepth(logDepth, fmt.Sprintf(msg, args...))
}

// Warningf ...
func (l *logger) Warningf(format string, args ...interface{}) {
prefixed := l.prepareLogPrefix(format, args...)
Expand Down