From 1300142b65746feedb64e20126050cfcd4ea8c7c Mon Sep 17 00:00:00 2001 From: Yoni Bettan Date: Wed, 19 May 2021 15:10:11 +0000 Subject: [PATCH] MGMT-6437: Redirecting cluster metrics to the correct Telemeter server. Currently all clusters send metrics to the default Telemeter server which is the prod instance of Telemeter in the cloud. This is wrong for 2 reasons: 1. integration/stage clusters statistics shouldn't be a part of production dashboards. 2. integration/stage service is configured to work against integration/stage AMS therefore, for those clusters, when metrics reaches prod Telemeter, they will fail on authN anyway. This change make sure each env send it metrics to the correct server or being disabled. Signed-off-by: Yoni Bettan --- cmd/main.go | 3 +- internal/cluster/cluster.go | 5 ++ internal/cluster/cluster_test.go | 39 ++++++++++ internal/network/manifests_generator.go | 66 +++++++++++++++- internal/network/manifests_generator_test.go | 81 +++++++++++++++++++- internal/network/mock_manifests_generator.go | 14 ++++ 6 files changed, 205 insertions(+), 3 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index f02b60a9bb3..3e84a880768 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -120,6 +120,7 @@ var Options struct { LeaderConfig leader.Config ValidationsConfig validations.Config AssistedServiceISOConfig assistedserviceiso.Config + manifestsGeneratorConfig network.Config EnableKubeAPI bool `envconfig:"ENABLE_KUBE_API" default:"false"` InfraEnvConfig controllers.InfraEnvConfig ISOEditorConfig isoeditor.Config @@ -309,7 +310,7 @@ func main() { hostApi := host.NewManager(log.WithField("pkg", "host-state"), db, eventsHandler, hwValidator, instructionApi, &Options.HWValidatorConfig, metricsManager, &Options.HostConfig, lead, operatorsManager) dnsApi := dns.NewDNSHandler(Options.BMConfig.BaseDNSDomains, log) - manifestsGenerator := network.NewManifestsGenerator(manifestsApi) + manifestsGenerator := network.NewManifestsGenerator(manifestsApi, Options.manifestsGeneratorConfig) clusterApi := cluster.NewManager(Options.ClusterConfig, log.WithField("pkg", "cluster-state"), db, eventsHandler, hostApi, metricsManager, manifestsGenerator, lead, operatorsManager, ocmClient, objectHandler, dnsApi) bootFilesApi := bootfiles.NewBootFilesAPI(log.WithField("pkg", "bootfiles"), objectHandler) diff --git a/internal/cluster/cluster.go b/internal/cluster/cluster.go index d802b58b10b..ebb40261700 100644 --- a/internal/cluster/cluster.go +++ b/internal/cluster/cluster.go @@ -1059,6 +1059,11 @@ func (m *Manager) GenerateAdditionalManifests(ctx context.Context, cluster *comm return errors.Wrapf(err, "Cluster %s - failed to generate manifests for vmware hosts", cluster.ID.String()) } } + + if err := m.manifestsGeneratorAPI.AddTelemeterManifest(ctx, log, cluster); err != nil { + return errors.Wrap(err, "failed to add telemeter manifest") + } + return nil } diff --git a/internal/cluster/cluster_test.go b/internal/cluster/cluster_test.go index fd60955eb2d..49015566e57 100644 --- a/internal/cluster/cluster_test.go +++ b/internal/cluster/cluster_test.go @@ -2285,6 +2285,7 @@ var _ = Describe("GenerateAdditionalManifests", func() { It("Single node manifests success", func() { manifestsGenerator.EXPECT().AddChronyManifest(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1) manifestsGenerator.EXPECT().AddDnsmasqForSingleNode(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1) + manifestsGenerator.EXPECT().AddTelemeterManifest(ctx, gomock.Any(), &c).Return(nil) mockOperatorMgr.EXPECT().GenerateManifests(gomock.Any(), gomock.Any()).Return(nil).Times(1) c.HighAvailabilityMode = swag.String(models.ClusterHighAvailabilityModeNone) err := capi.GenerateAdditionalManifests(ctx, &c) @@ -2304,6 +2305,7 @@ var _ = Describe("GenerateAdditionalManifests", func() { cfg2.EnableSingleNodeDnsmasq = false capi = NewManager(cfg2, common.GetTestLog(), db, eventsHandler, nil, mockMetric, manifestsGenerator, nil, mockOperatorMgr, nil, nil, nil) manifestsGenerator.EXPECT().AddChronyManifest(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1) + manifestsGenerator.EXPECT().AddTelemeterManifest(ctx, gomock.Any(), &c).Return(nil) mockOperatorMgr.EXPECT().GenerateManifests(gomock.Any(), gomock.Any()).Return(nil).Times(1) c.HighAvailabilityMode = swag.String(models.ClusterHighAvailabilityModeNone) err := capi.GenerateAdditionalManifests(ctx, &c) @@ -2317,6 +2319,7 @@ var _ = Describe("GenerateAdditionalManifests", func() { capi = NewManager(cfg2, common.GetTestLog(), db, eventsHandler, nil, mockMetric, manifestsGenerator, nil, mockOperatorMgr, nil, nil, nil) manifestsGenerator.EXPECT().AddChronyManifest(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1) manifestsGenerator.EXPECT().AddDisableVmwareTunnelOffloading(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1) + manifestsGenerator.EXPECT().AddTelemeterManifest(ctx, gomock.Any(), &c).Return(nil) mockOperatorMgr.EXPECT().GenerateManifests(gomock.Any(), gomock.Any()).Return(nil).Times(1) c.HighAvailabilityMode = swag.String(models.ClusterHighAvailabilityModeFull) @@ -2332,6 +2335,7 @@ var _ = Describe("GenerateAdditionalManifests", func() { capi = NewManager(cfg2, common.GetTestLog(), db, eventsHandler, nil, mockMetric, manifestsGenerator, nil, mockOperatorMgr, nil, nil, nil) manifestsGenerator.EXPECT().AddChronyManifest(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1) manifestsGenerator.EXPECT().AddDisableVmwareTunnelOffloading(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1) + manifestsGenerator.EXPECT().AddTelemeterManifest(ctx, gomock.Any(), &c).Return(nil) mockOperatorMgr.EXPECT().GenerateManifests(gomock.Any(), gomock.Any()).Return(nil).Times(1) c.HighAvailabilityMode = swag.String(models.ClusterHighAvailabilityModeFull) @@ -2349,6 +2353,7 @@ var _ = Describe("GenerateAdditionalManifests", func() { capi = NewManager(cfg2, common.GetTestLog(), db, eventsHandler, nil, mockMetric, manifestsGenerator, nil, mockOperatorMgr, nil, nil, nil) manifestsGenerator.EXPECT().AddChronyManifest(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1) manifestsGenerator.EXPECT().AddDisableVmwareTunnelOffloading(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1) + manifestsGenerator.EXPECT().AddTelemeterManifest(ctx, gomock.Any(), &c).Return(nil) mockOperatorMgr.EXPECT().GenerateManifests(gomock.Any(), gomock.Any()).Return(nil).Times(1) c.HighAvailabilityMode = swag.String(models.ClusterHighAvailabilityModeFull) @@ -2366,6 +2371,7 @@ var _ = Describe("GenerateAdditionalManifests", func() { cfg2.EnableSingleNodeDnsmasq = false capi = NewManager(cfg2, common.GetTestLog(), db, eventsHandler, nil, mockMetric, manifestsGenerator, nil, mockOperatorMgr, nil, nil, nil) manifestsGenerator.EXPECT().AddChronyManifest(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1) + manifestsGenerator.EXPECT().AddTelemeterManifest(ctx, gomock.Any(), &c).Return(nil) mockOperatorMgr.EXPECT().GenerateManifests(gomock.Any(), gomock.Any()).Return(nil).Times(1) c.HighAvailabilityMode = swag.String(models.ClusterHighAvailabilityModeFull) @@ -2377,6 +2383,39 @@ var _ = Describe("GenerateAdditionalManifests", func() { }) }) + Context("Telemeter", func() { + + var ( + telemeterCfg Config + capi API + ) + + BeforeEach(func() { + telemeterCfg = getDefaultConfig() + capi = NewManager(telemeterCfg, common.GetTestLog(), db, eventsHandler, nil, mockMetric, manifestsGenerator, nil, mockOperatorMgr, nil, nil, nil) + }) + + It("Happy flow", func() { + + manifestsGenerator.EXPECT().AddChronyManifest(ctx, gomock.Any(), &c).Return(nil) + mockOperatorMgr.EXPECT().GenerateManifests(ctx, &c).Return(nil) + manifestsGenerator.EXPECT().AddTelemeterManifest(ctx, gomock.Any(), &c).Return(nil) + + err := capi.GenerateAdditionalManifests(ctx, &c) + Expect(err).To(Not(HaveOccurred())) + }) + + It("AddTelemeterManifest failed", func() { + + manifestsGenerator.EXPECT().AddChronyManifest(ctx, gomock.Any(), &c).Return(nil) + mockOperatorMgr.EXPECT().GenerateManifests(ctx, &c).Return(nil) + manifestsGenerator.EXPECT().AddTelemeterManifest(ctx, gomock.Any(), &c).Return(errors.New("dummy")) + + err := capi.GenerateAdditionalManifests(ctx, &c) + Expect(err).To(HaveOccurred()) + }) + }) + AfterEach(func() { ctrl.Finish() common.DeleteTestDB(db, dbName) diff --git a/internal/network/manifests_generator.go b/internal/network/manifests_generator.go index de424a520bb..0028747ec7b 100644 --- a/internal/network/manifests_generator.go +++ b/internal/network/manifests_generator.go @@ -24,15 +24,22 @@ type ManifestsGeneratorAPI interface { AddChronyManifest(ctx context.Context, log logrus.FieldLogger, c *common.Cluster) error AddDnsmasqForSingleNode(ctx context.Context, log logrus.FieldLogger, c *common.Cluster) error AddDisableVmwareTunnelOffloading(ctx context.Context, log logrus.FieldLogger, c *common.Cluster) error + AddTelemeterManifest(ctx context.Context, log logrus.FieldLogger, c *common.Cluster) error +} + +type Config struct { + ServiceBaseURL string `envconfig:"SERVICE_BASE_URL"` } type ManifestsGenerator struct { manifestsApi restapi.ManifestsAPI + Config Config } -func NewManifestsGenerator(manifestsApi restapi.ManifestsAPI) *ManifestsGenerator { +func NewManifestsGenerator(manifestsApi restapi.ManifestsAPI, config Config) *ManifestsGenerator { return &ManifestsGenerator{ manifestsApi: manifestsApi, + Config: config, } } @@ -340,3 +347,60 @@ func (m *ManifestsGenerator) AddDisableVmwareTunnelOffloading(ctx context.Contex } return nil } + +const ( + redirectTelemeterStageManifest = ` +apiVersion: v1 +kind: ConfigMap +metadata: + name: cluster-monitoring-config + namespace: openshift-monitoring +data: + config.yaml: | + telemeterClient: + telemeterServerURL: {{.TELEMETER_SERVER_URL}} +` + + prodServiceBaseURL = "https://api.openshift.com" + stageServiceBaseURL = "https://api.stage.openshift.com" + stageTelemeterURL = "https://infogw.stage.api.openshift.com" + dummyURL = "https://dummy.com" +) + +// Default Telemeter server is prod. +// In case the cluster is created in stage env we need to redirct to Telemter-stage +// Note: There is no Telemeter-integraion so in this and any other cases we will redirect the metrics to a dummy URL +func (m *ManifestsGenerator) AddTelemeterManifest(ctx context.Context, log logrus.FieldLogger, c *common.Cluster) error { + + manifestParams := map[string]string{} + + if m.Config.ServiceBaseURL == prodServiceBaseURL { + return nil + } + + if m.Config.ServiceBaseURL == stageServiceBaseURL { + + log.Infof("Creating manifest to redirect metrics from installed cluster to telemeter-stage") + manifestParams["TELEMETER_SERVER_URL"] = stageTelemeterURL + + } else { + + log.Infof("Creating manifest to redirect metrics from installed cluster to a dummy URL") + manifestParams["TELEMETER_SERVER_URL"] = dummyURL + + } + + content, err := fillTemplate(manifestParams, redirectTelemeterStageManifest, log) + if err != nil { + log.WithError(err).Error("Failed to parse metrics redirection's template") + return err + } + + if err := m.createManifests(ctx, c, "redirect-telemeter.yaml", content); err != nil { + + log.WithError(err).Error("Failed to create manifest to redirect metrics from installed cluster") + return err + } + + return nil +} diff --git a/internal/network/manifests_generator_test.go b/internal/network/manifests_generator_test.go index 7730ce878d4..0771862ef1f 100644 --- a/internal/network/manifests_generator_test.go +++ b/internal/network/manifests_generator_test.go @@ -123,7 +123,7 @@ var _ = Describe("chrony manifest", func() { log = logrus.New() ctrl = gomock.NewController(GinkgoT()) manifestsApi = mocks.NewMockManifestsAPI(ctrl) - ntpUtils = NewManifestsGenerator(manifestsApi) + ntpUtils = NewManifestsGenerator(manifestsApi, Config{}) db, dbName = common.PrepareTestDB() clusterId = strfmt.UUID(uuid.New().String()) @@ -305,3 +305,82 @@ var _ = Describe("dnsmasq manifest", func() { }) }) + +var _ = Describe("telemeter manifest", func() { + + var ( + ctx = context.Background() + log *logrus.Logger + ctrl *gomock.Controller + mockManifestsApi *mocks.MockManifestsAPI + manifestsGeneratorApi ManifestsGeneratorAPI + db *gorm.DB + dbName string + clusterId strfmt.UUID + cluster common.Cluster + ) + + BeforeEach(func() { + + log = logrus.New() + ctrl = gomock.NewController(GinkgoT()) + mockManifestsApi = mocks.NewMockManifestsAPI(ctrl) + db, dbName = common.PrepareTestDB() + clusterId = strfmt.UUID(uuid.New().String()) + + cluster = common.Cluster{ + Cluster: models.Cluster{ + ID: &clusterId, + }, + } + Expect(db.Create(&cluster).Error).NotTo(HaveOccurred()) + }) + + AfterEach(func() { + ctrl.Finish() + common.DeleteTestDB(db, dbName) + }) + + for _, test := range []struct { + envName string + serviceBaseURL string + }{ + { + envName: "Prod env", + serviceBaseURL: prodServiceBaseURL, + }, + { + envName: "Stage env", + serviceBaseURL: stageServiceBaseURL, + }, + { + envName: "Other envs", + serviceBaseURL: dummyURL, + }, + } { + test := test + Context(test.envName, func() { + + BeforeEach(func() { + manifestsGeneratorApi = NewManifestsGenerator(mockManifestsApi, Config{ServiceBaseURL: test.serviceBaseURL}) + }) + + It("happy flow", func() { + if test.envName != "Prod env" { + mockManifestsApi.EXPECT().CreateClusterManifest(ctx, gomock.Any()).Return(operations.NewCreateClusterManifestCreated()) + } + err := manifestsGeneratorApi.AddTelemeterManifest(ctx, log, &cluster) + Expect(err).ShouldNot(HaveOccurred()) + }) + + It("AddTelemeterManifest failure", func() { + if test.envName == "Prod env" { + Skip("We don't create any additional manifest in prod") + } + mockManifestsApi.EXPECT().CreateClusterManifest(ctx, gomock.Any()).Return(common.GenerateErrorResponder(errors.Errorf("failed to upload to s3"))) + err := manifestsGeneratorApi.AddTelemeterManifest(ctx, log, &cluster) + Expect(err).Should(HaveOccurred()) + }) + }) + } +}) diff --git a/internal/network/mock_manifests_generator.go b/internal/network/mock_manifests_generator.go index ac11211968a..e88b9b1a5af 100644 --- a/internal/network/mock_manifests_generator.go +++ b/internal/network/mock_manifests_generator.go @@ -76,3 +76,17 @@ func (mr *MockManifestsGeneratorAPIMockRecorder) AddDisableVmwareTunnelOffloadin mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddDisableVmwareTunnelOffloading", reflect.TypeOf((*MockManifestsGeneratorAPI)(nil).AddDisableVmwareTunnelOffloading), ctx, log, c) } + +// AddTelemeterManifest mocks base method +func (m *MockManifestsGeneratorAPI) AddTelemeterManifest(ctx context.Context, log logrus.FieldLogger, c *common.Cluster) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "AddTelemeterManifest", ctx, log, c) + ret0, _ := ret[0].(error) + return ret0 +} + +// AddTelemeterManifest indicates an expected call of AddTelemeterManifest +func (mr *MockManifestsGeneratorAPIMockRecorder) AddTelemeterManifest(ctx, log, c interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddTelemeterManifest", reflect.TypeOf((*MockManifestsGeneratorAPI)(nil).AddTelemeterManifest), ctx, log, c) +}