Skip to content

Commit

Permalink
MGMT-6437: Redirecting cluster metrics to the correct Telemeter serve…
Browse files Browse the repository at this point in the history
…r. (#1773)

Currently all clusters send metrics to the default Telemeter server
which is the prod instance of Telemeter in the cloud.

This is wrong for 2 reasons:

1. integration/stage clusters statistics shouldn't be a part of
   production dashboards.

2. integration/stage service is configured to work against integration/stage
   AMS therefore, for those clusters, when metrics reaches prod
   Telemeter, they will fail on authN anyway.

This change make sure each env send it metrics to the correct server or
being disabled.

Signed-off-by: Yoni Bettan <[email protected]>
  • Loading branch information
ybettan authored May 25, 2021
1 parent 90de569 commit af46cb3
Show file tree
Hide file tree
Showing 6 changed files with 205 additions and 3 deletions.
3 changes: 2 additions & 1 deletion cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ var Options struct {
LeaderConfig leader.Config
ValidationsConfig validations.Config
AssistedServiceISOConfig assistedserviceiso.Config
manifestsGeneratorConfig network.Config
EnableKubeAPI bool `envconfig:"ENABLE_KUBE_API" default:"false"`
InfraEnvConfig controllers.InfraEnvConfig
ISOEditorConfig isoeditor.Config
Expand Down Expand Up @@ -309,7 +310,7 @@ func main() {
hostApi := host.NewManager(log.WithField("pkg", "host-state"), db, eventsHandler, hwValidator,
instructionApi, &Options.HWValidatorConfig, metricsManager, &Options.HostConfig, lead, operatorsManager)
dnsApi := dns.NewDNSHandler(Options.BMConfig.BaseDNSDomains, log)
manifestsGenerator := network.NewManifestsGenerator(manifestsApi)
manifestsGenerator := network.NewManifestsGenerator(manifestsApi, Options.manifestsGeneratorConfig)
clusterApi := cluster.NewManager(Options.ClusterConfig, log.WithField("pkg", "cluster-state"), db,
eventsHandler, hostApi, metricsManager, manifestsGenerator, lead, operatorsManager, ocmClient, objectHandler, dnsApi)
bootFilesApi := bootfiles.NewBootFilesAPI(log.WithField("pkg", "bootfiles"), objectHandler)
Expand Down
5 changes: 5 additions & 0 deletions internal/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -1059,6 +1059,11 @@ func (m *Manager) GenerateAdditionalManifests(ctx context.Context, cluster *comm
return errors.Wrapf(err, "Cluster %s - failed to generate manifests for vmware hosts", cluster.ID.String())
}
}

if err := m.manifestsGeneratorAPI.AddTelemeterManifest(ctx, log, cluster); err != nil {
return errors.Wrap(err, "failed to add telemeter manifest")
}

return nil
}

Expand Down
39 changes: 39 additions & 0 deletions internal/cluster/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2285,6 +2285,7 @@ var _ = Describe("GenerateAdditionalManifests", func() {
It("Single node manifests success", func() {
manifestsGenerator.EXPECT().AddChronyManifest(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1)
manifestsGenerator.EXPECT().AddDnsmasqForSingleNode(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1)
manifestsGenerator.EXPECT().AddTelemeterManifest(ctx, gomock.Any(), &c).Return(nil)
mockOperatorMgr.EXPECT().GenerateManifests(gomock.Any(), gomock.Any()).Return(nil).Times(1)
c.HighAvailabilityMode = swag.String(models.ClusterHighAvailabilityModeNone)
err := capi.GenerateAdditionalManifests(ctx, &c)
Expand All @@ -2304,6 +2305,7 @@ var _ = Describe("GenerateAdditionalManifests", func() {
cfg2.EnableSingleNodeDnsmasq = false
capi = NewManager(cfg2, common.GetTestLog(), db, eventsHandler, nil, mockMetric, manifestsGenerator, nil, mockOperatorMgr, nil, nil, nil)
manifestsGenerator.EXPECT().AddChronyManifest(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1)
manifestsGenerator.EXPECT().AddTelemeterManifest(ctx, gomock.Any(), &c).Return(nil)
mockOperatorMgr.EXPECT().GenerateManifests(gomock.Any(), gomock.Any()).Return(nil).Times(1)
c.HighAvailabilityMode = swag.String(models.ClusterHighAvailabilityModeNone)
err := capi.GenerateAdditionalManifests(ctx, &c)
Expand All @@ -2317,6 +2319,7 @@ var _ = Describe("GenerateAdditionalManifests", func() {
capi = NewManager(cfg2, common.GetTestLog(), db, eventsHandler, nil, mockMetric, manifestsGenerator, nil, mockOperatorMgr, nil, nil, nil)
manifestsGenerator.EXPECT().AddChronyManifest(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1)
manifestsGenerator.EXPECT().AddDisableVmwareTunnelOffloading(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1)
manifestsGenerator.EXPECT().AddTelemeterManifest(ctx, gomock.Any(), &c).Return(nil)
mockOperatorMgr.EXPECT().GenerateManifests(gomock.Any(), gomock.Any()).Return(nil).Times(1)
c.HighAvailabilityMode = swag.String(models.ClusterHighAvailabilityModeFull)

Expand All @@ -2332,6 +2335,7 @@ var _ = Describe("GenerateAdditionalManifests", func() {
capi = NewManager(cfg2, common.GetTestLog(), db, eventsHandler, nil, mockMetric, manifestsGenerator, nil, mockOperatorMgr, nil, nil, nil)
manifestsGenerator.EXPECT().AddChronyManifest(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1)
manifestsGenerator.EXPECT().AddDisableVmwareTunnelOffloading(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1)
manifestsGenerator.EXPECT().AddTelemeterManifest(ctx, gomock.Any(), &c).Return(nil)
mockOperatorMgr.EXPECT().GenerateManifests(gomock.Any(), gomock.Any()).Return(nil).Times(1)
c.HighAvailabilityMode = swag.String(models.ClusterHighAvailabilityModeFull)

Expand All @@ -2349,6 +2353,7 @@ var _ = Describe("GenerateAdditionalManifests", func() {
capi = NewManager(cfg2, common.GetTestLog(), db, eventsHandler, nil, mockMetric, manifestsGenerator, nil, mockOperatorMgr, nil, nil, nil)
manifestsGenerator.EXPECT().AddChronyManifest(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1)
manifestsGenerator.EXPECT().AddDisableVmwareTunnelOffloading(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1)
manifestsGenerator.EXPECT().AddTelemeterManifest(ctx, gomock.Any(), &c).Return(nil)
mockOperatorMgr.EXPECT().GenerateManifests(gomock.Any(), gomock.Any()).Return(nil).Times(1)
c.HighAvailabilityMode = swag.String(models.ClusterHighAvailabilityModeFull)

Expand All @@ -2366,6 +2371,7 @@ var _ = Describe("GenerateAdditionalManifests", func() {
cfg2.EnableSingleNodeDnsmasq = false
capi = NewManager(cfg2, common.GetTestLog(), db, eventsHandler, nil, mockMetric, manifestsGenerator, nil, mockOperatorMgr, nil, nil, nil)
manifestsGenerator.EXPECT().AddChronyManifest(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1)
manifestsGenerator.EXPECT().AddTelemeterManifest(ctx, gomock.Any(), &c).Return(nil)
mockOperatorMgr.EXPECT().GenerateManifests(gomock.Any(), gomock.Any()).Return(nil).Times(1)
c.HighAvailabilityMode = swag.String(models.ClusterHighAvailabilityModeFull)

Expand All @@ -2377,6 +2383,39 @@ var _ = Describe("GenerateAdditionalManifests", func() {
})
})

Context("Telemeter", func() {

var (
telemeterCfg Config
capi API
)

BeforeEach(func() {
telemeterCfg = getDefaultConfig()
capi = NewManager(telemeterCfg, common.GetTestLog(), db, eventsHandler, nil, mockMetric, manifestsGenerator, nil, mockOperatorMgr, nil, nil, nil)
})

It("Happy flow", func() {

manifestsGenerator.EXPECT().AddChronyManifest(ctx, gomock.Any(), &c).Return(nil)
mockOperatorMgr.EXPECT().GenerateManifests(ctx, &c).Return(nil)
manifestsGenerator.EXPECT().AddTelemeterManifest(ctx, gomock.Any(), &c).Return(nil)

err := capi.GenerateAdditionalManifests(ctx, &c)
Expect(err).To(Not(HaveOccurred()))
})

It("AddTelemeterManifest failed", func() {

manifestsGenerator.EXPECT().AddChronyManifest(ctx, gomock.Any(), &c).Return(nil)
mockOperatorMgr.EXPECT().GenerateManifests(ctx, &c).Return(nil)
manifestsGenerator.EXPECT().AddTelemeterManifest(ctx, gomock.Any(), &c).Return(errors.New("dummy"))

err := capi.GenerateAdditionalManifests(ctx, &c)
Expect(err).To(HaveOccurred())
})
})

AfterEach(func() {
ctrl.Finish()
common.DeleteTestDB(db, dbName)
Expand Down
66 changes: 65 additions & 1 deletion internal/network/manifests_generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,22 @@ type ManifestsGeneratorAPI interface {
AddChronyManifest(ctx context.Context, log logrus.FieldLogger, c *common.Cluster) error
AddDnsmasqForSingleNode(ctx context.Context, log logrus.FieldLogger, c *common.Cluster) error
AddDisableVmwareTunnelOffloading(ctx context.Context, log logrus.FieldLogger, c *common.Cluster) error
AddTelemeterManifest(ctx context.Context, log logrus.FieldLogger, c *common.Cluster) error
}

type Config struct {
ServiceBaseURL string `envconfig:"SERVICE_BASE_URL"`
}

type ManifestsGenerator struct {
manifestsApi restapi.ManifestsAPI
Config Config
}

func NewManifestsGenerator(manifestsApi restapi.ManifestsAPI) *ManifestsGenerator {
func NewManifestsGenerator(manifestsApi restapi.ManifestsAPI, config Config) *ManifestsGenerator {
return &ManifestsGenerator{
manifestsApi: manifestsApi,
Config: config,
}
}

Expand Down Expand Up @@ -340,3 +347,60 @@ func (m *ManifestsGenerator) AddDisableVmwareTunnelOffloading(ctx context.Contex
}
return nil
}

const (
redirectTelemeterStageManifest = `
apiVersion: v1
kind: ConfigMap
metadata:
name: cluster-monitoring-config
namespace: openshift-monitoring
data:
config.yaml: |
telemeterClient:
telemeterServerURL: {{.TELEMETER_SERVER_URL}}
`

prodServiceBaseURL = "https://api.openshift.com"
stageServiceBaseURL = "https://api.stage.openshift.com"
stageTelemeterURL = "https://infogw.stage.api.openshift.com"
dummyURL = "https://dummy.com"
)

// Default Telemeter server is prod.
// In case the cluster is created in stage env we need to redirct to Telemter-stage
// Note: There is no Telemeter-integraion so in this and any other cases we will redirect the metrics to a dummy URL
func (m *ManifestsGenerator) AddTelemeterManifest(ctx context.Context, log logrus.FieldLogger, c *common.Cluster) error {

manifestParams := map[string]string{}

if m.Config.ServiceBaseURL == prodServiceBaseURL {
return nil
}

if m.Config.ServiceBaseURL == stageServiceBaseURL {

log.Infof("Creating manifest to redirect metrics from installed cluster to telemeter-stage")
manifestParams["TELEMETER_SERVER_URL"] = stageTelemeterURL

} else {

log.Infof("Creating manifest to redirect metrics from installed cluster to a dummy URL")
manifestParams["TELEMETER_SERVER_URL"] = dummyURL

}

content, err := fillTemplate(manifestParams, redirectTelemeterStageManifest, log)
if err != nil {
log.WithError(err).Error("Failed to parse metrics redirection's template")
return err
}

if err := m.createManifests(ctx, c, "redirect-telemeter.yaml", content); err != nil {

log.WithError(err).Error("Failed to create manifest to redirect metrics from installed cluster")
return err
}

return nil
}
81 changes: 80 additions & 1 deletion internal/network/manifests_generator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ var _ = Describe("chrony manifest", func() {
log = logrus.New()
ctrl = gomock.NewController(GinkgoT())
manifestsApi = mocks.NewMockManifestsAPI(ctrl)
ntpUtils = NewManifestsGenerator(manifestsApi)
ntpUtils = NewManifestsGenerator(manifestsApi, Config{})
db, dbName = common.PrepareTestDB()
clusterId = strfmt.UUID(uuid.New().String())

Expand Down Expand Up @@ -305,3 +305,82 @@ var _ = Describe("dnsmasq manifest", func() {
})

})

var _ = Describe("telemeter manifest", func() {

var (
ctx = context.Background()
log *logrus.Logger
ctrl *gomock.Controller
mockManifestsApi *mocks.MockManifestsAPI
manifestsGeneratorApi ManifestsGeneratorAPI
db *gorm.DB
dbName string
clusterId strfmt.UUID
cluster common.Cluster
)

BeforeEach(func() {

log = logrus.New()
ctrl = gomock.NewController(GinkgoT())
mockManifestsApi = mocks.NewMockManifestsAPI(ctrl)
db, dbName = common.PrepareTestDB()
clusterId = strfmt.UUID(uuid.New().String())

cluster = common.Cluster{
Cluster: models.Cluster{
ID: &clusterId,
},
}
Expect(db.Create(&cluster).Error).NotTo(HaveOccurred())
})

AfterEach(func() {
ctrl.Finish()
common.DeleteTestDB(db, dbName)
})

for _, test := range []struct {
envName string
serviceBaseURL string
}{
{
envName: "Prod env",
serviceBaseURL: prodServiceBaseURL,
},
{
envName: "Stage env",
serviceBaseURL: stageServiceBaseURL,
},
{
envName: "Other envs",
serviceBaseURL: dummyURL,
},
} {
test := test
Context(test.envName, func() {

BeforeEach(func() {
manifestsGeneratorApi = NewManifestsGenerator(mockManifestsApi, Config{ServiceBaseURL: test.serviceBaseURL})
})

It("happy flow", func() {
if test.envName != "Prod env" {
mockManifestsApi.EXPECT().CreateClusterManifest(ctx, gomock.Any()).Return(operations.NewCreateClusterManifestCreated())
}
err := manifestsGeneratorApi.AddTelemeterManifest(ctx, log, &cluster)
Expect(err).ShouldNot(HaveOccurred())
})

It("AddTelemeterManifest failure", func() {
if test.envName == "Prod env" {
Skip("We don't create any additional manifest in prod")
}
mockManifestsApi.EXPECT().CreateClusterManifest(ctx, gomock.Any()).Return(common.GenerateErrorResponder(errors.Errorf("failed to upload to s3")))
err := manifestsGeneratorApi.AddTelemeterManifest(ctx, log, &cluster)
Expect(err).Should(HaveOccurred())
})
})
}
})
14 changes: 14 additions & 0 deletions internal/network/mock_manifests_generator.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit af46cb3

Please sign in to comment.