From 8dfec4894c77dcda435d0905fd246106cf0dfaeb Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Wed, 30 Nov 2022 13:13:49 +0100 Subject: [PATCH 1/4] Completely ignore Instance Metadata when in SQS Queue mode. --- cmd/node-termination-handler.go | 27 +++++++++-------- pkg/ec2metadata/ec2metadata.go | 53 ++++++++++++++++++++------------- 2 files changed, 48 insertions(+), 32 deletions(-) diff --git a/cmd/node-termination-handler.go b/cmd/node-termination-handler.go index 09b45ed7..3c441a45 100644 --- a/cmd/node-termination-handler.go +++ b/cmd/node-termination-handler.go @@ -113,11 +113,12 @@ func main() { nthConfig.Print() log.Fatal().Err(err).Msg("Unable to instantiate probes service,") } + imdsDisabled := nthConfig.EnableSQSTerminationDraining imds := ec2metadata.New(nthConfig.MetadataURL, nthConfig.MetadataTries) interruptionEventStore := interruptioneventstore.New(nthConfig) - nodeMetadata := imds.GetNodeMetadata() + nodeMetadata := imds.GetNodeMetadata(!imdsDisabled) // Populate the aws region if available from node metadata and not already explicitly configured if nthConfig.AWSRegion == "" && nodeMetadata.Region != "" { nthConfig.AWSRegion = nodeMetadata.Region @@ -163,17 +164,19 @@ func main() { defer close(cancelChan) monitoringFns := map[string]monitor.Monitor{} - if nthConfig.EnableSpotInterruptionDraining { - imdsSpotMonitor := spotitn.NewSpotInterruptionMonitor(imds, interruptionChan, cancelChan, nthConfig.NodeName) - monitoringFns[spotITN] = imdsSpotMonitor - } - if nthConfig.EnableScheduledEventDraining { - imdsScheduledEventMonitor := scheduledevent.NewScheduledEventMonitor(imds, interruptionChan, cancelChan, nthConfig.NodeName) - monitoringFns[scheduledMaintenance] = imdsScheduledEventMonitor - } - if nthConfig.EnableRebalanceMonitoring || nthConfig.EnableRebalanceDraining { - imdsRebalanceMonitor := rebalancerecommendation.NewRebalanceRecommendationMonitor(imds, interruptionChan, nthConfig.NodeName) - monitoringFns[rebalanceRecommendation] = imdsRebalanceMonitor + if !imdsDisabled { + if nthConfig.EnableSpotInterruptionDraining { + imdsSpotMonitor := spotitn.NewSpotInterruptionMonitor(imds, interruptionChan, cancelChan, nthConfig.NodeName) + monitoringFns[spotITN] = imdsSpotMonitor + } + if nthConfig.EnableScheduledEventDraining { + imdsScheduledEventMonitor := scheduledevent.NewScheduledEventMonitor(imds, interruptionChan, cancelChan, nthConfig.NodeName) + monitoringFns[scheduledMaintenance] = imdsScheduledEventMonitor + } + if nthConfig.EnableRebalanceMonitoring || nthConfig.EnableRebalanceDraining { + imdsRebalanceMonitor := rebalancerecommendation.NewRebalanceRecommendationMonitor(imds, interruptionChan, nthConfig.NodeName) + monitoringFns[rebalanceRecommendation] = imdsRebalanceMonitor + } } if nthConfig.EnableSQSTerminationDraining { cfg := aws.NewConfig().WithRegion(nthConfig.AWSRegion).WithEndpoint(nthConfig.AWSEndpoint).WithSTSRegionalEndpoint(endpoints.RegionalSTSEndpoint) diff --git a/pkg/ec2metadata/ec2metadata.go b/pkg/ec2metadata/ec2metadata.go index bb8767d7..76592cf0 100644 --- a/pkg/ec2metadata/ec2metadata.go +++ b/pkg/ec2metadata/ec2metadata.go @@ -325,30 +325,43 @@ func retry(attempts int, sleep time.Duration, httpReq func() (*http.Response, er } // GetNodeMetadata attempts to gather additional ec2 instance information from the metadata service -func (e *Service) GetNodeMetadata() NodeMetadata { +func (e *Service) GetNodeMetadata(imdsDisabled bool) NodeMetadata { var metadata NodeMetadata - identityDoc, err := e.GetMetadataInfo(IdentityDocPath) - if err != nil { - log.Err(err).Msg("Unable to fetch metadata from IMDS") - return metadata - } - err = json.NewDecoder(strings.NewReader(identityDoc)).Decode(&metadata) - if err != nil { - log.Warn().Msg("Unable to fetch instance identity document from ec2 metadata") - metadata.InstanceID, _ = e.GetMetadataInfo(InstanceIDPath) - metadata.InstanceType, _ = e.GetMetadataInfo(InstanceTypePath) - metadata.LocalIP, _ = e.GetMetadataInfo(LocalIPPath) - metadata.AvailabilityZone, _ = e.GetMetadataInfo(AZPlacementPath) + if (!imdsDisabled) { + identityDoc, err := e.GetMetadataInfo(IdentityDocPath) + if err != nil { + log.Err(err).Msg("Unable to fetch metadata from IMDS") + return metadata + } + err = json.NewDecoder(strings.NewReader(identityDoc)).Decode(&metadata) + if err != nil { + log.Warn().Msg("Unable to fetch instance identity document from ec2 metadata") + metadata.InstanceID, _ = e.GetMetadataInfo(InstanceIDPath) + metadata.InstanceType, _ = e.GetMetadataInfo(InstanceTypePath) + metadata.LocalIP, _ = e.GetMetadataInfo(LocalIPPath) + metadata.AvailabilityZone, _ = e.GetMetadataInfo(AZPlacementPath) if len(metadata.AvailabilityZone) > 1 { - metadata.Region = metadata.AvailabilityZone[0 : len(metadata.AvailabilityZone)-1] + metadata.Region = metadata.AvailabilityZone[0 : len(metadata.AvailabilityZone)-1] } - } - metadata.InstanceLifeCycle, _ = e.GetMetadataInfo(InstanceLifeCycle) - metadata.LocalHostname, _ = e.GetMetadataInfo(LocalHostnamePath) - metadata.PublicHostname, _ = e.GetMetadataInfo(PublicHostnamePath) - metadata.PublicIP, _ = e.GetMetadataInfo(PublicIPPath) + } + metadata.InstanceLifeCycle, _ = e.GetMetadataInfo(InstanceLifeCycle) + metadata.LocalHostname, _ = e.GetMetadataInfo(LocalHostnamePath) + metadata.PublicHostname, _ = e.GetMetadataInfo(PublicHostnamePath) + metadata.PublicIP, _ = e.GetMetadataInfo(PublicIPPath) - log.Info().Interface("metadata", metadata).Msg("Startup Metadata Retrieved") + log.Info().Interface("metadata", metadata).Msg("Startup Metadata Retrieved") + } else { + metadata.AccountId = "" + metadata.InstanceID = "" + metadata.InstanceLifeCycle = "" + metadata.InstanceType = "" + metadata.PublicHostname = "" + metadata.PublicIP = "" + metadata.LocalHostname = "" + metadata.LocalIP = "" + metadata.AvailabilityZone = "" + metadata.Region = "" + } return metadata } From de95f476ff1b7c1b8866f3afdccb0f576da2783a Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Sun, 4 Dec 2022 19:48:35 +0100 Subject: [PATCH 2/4] Function parameter typo fix. --- cmd/node-termination-handler.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/node-termination-handler.go b/cmd/node-termination-handler.go index 3c441a45..aaa25ac2 100644 --- a/cmd/node-termination-handler.go +++ b/cmd/node-termination-handler.go @@ -118,7 +118,7 @@ func main() { imds := ec2metadata.New(nthConfig.MetadataURL, nthConfig.MetadataTries) interruptionEventStore := interruptioneventstore.New(nthConfig) - nodeMetadata := imds.GetNodeMetadata(!imdsDisabled) + nodeMetadata := imds.GetNodeMetadata(imdsDisabled) // Populate the aws region if available from node metadata and not already explicitly configured if nthConfig.AWSRegion == "" && nodeMetadata.Region != "" { nthConfig.AWSRegion = nodeMetadata.Region From 3850aa667cddb9a7e6f2ecee0b6d30e851b5bc9e Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Sun, 4 Dec 2022 20:13:05 +0100 Subject: [PATCH 3/4] Fix imds.GetNodeMetadata() test. --- pkg/ec2metadata/ec2metadata_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/ec2metadata/ec2metadata_test.go b/pkg/ec2metadata/ec2metadata_test.go index eb57e8d9..60d00fdf 100644 --- a/pkg/ec2metadata/ec2metadata_test.go +++ b/pkg/ec2metadata/ec2metadata_test.go @@ -580,7 +580,7 @@ func TestGetNodeMetadata(t *testing.T) { // Use URL from our local test server imds := ec2metadata.New(server.URL, 1) - nodeMetadata := imds.GetNodeMetadata() + nodeMetadata := imds.GetNodeMetadata(false) h.Assert(t, nodeMetadata.InstanceID == `metadata`, `Missing required NodeMetadata field InstanceID`) h.Assert(t, nodeMetadata.InstanceType == `metadata`, `Missing required NodeMetadata field InstanceType`) From 34ead36c2dc23b5731c65b229ee906d703ff4dfe Mon Sep 17 00:00:00 2001 From: Steve Nay <265958+snay2@users.noreply.github.com> Date: Mon, 5 Dec 2022 16:14:31 -0600 Subject: [PATCH 4/4] Add unit test when IMDS mode is disabled Update README to reflect new behavior. Fix NodeMetadata initialization logic. --- README.md | 2 +- pkg/ec2metadata/ec2metadata.go | 57 ++++++++++++----------------- pkg/ec2metadata/ec2metadata_test.go | 25 +++++++++++++ 3 files changed, 49 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 2442cb80..f2acb0f7 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,7 @@ The `enableSqsTerminationDraining` must be set to false for these configuration The Queue Processor Mode does not allow for fine-grained configuration of which events are handled through helm configuration keys. Instead, you can modify your Amazon EventBridge rules to not send certain types of events to the SQS Queue so that NTH does not process those events. All events when operating in Queue Processor mode are Cordoned and Drained unless the `cordon-only` flag is set to true. -The `enableSqsTerminationDraining` flag turns on Queue Processor Mode. When Queue Processor Mode is enabled, IMDS mode cannot be active. NTH cannot respond to queue events AND monitor IMDS paths. Queue Processor Mode still queries for node information on startup, but this information is not required for normal operation, so it is safe to disable IMDS for the NTH pod. +The `enableSqsTerminationDraining` flag turns on Queue Processor Mode. When Queue Processor Mode is enabled, IMDS mode will be disabled, even if you explicitly enabled any of the IMDS configuration keys. NTH cannot respond to queue events AND monitor IMDS paths. In this case, it is safe to disable IMDS for the NTH pod.
AWS Node Termination Handler - IMDS Processor diff --git a/pkg/ec2metadata/ec2metadata.go b/pkg/ec2metadata/ec2metadata.go index 76592cf0..1f17e9c1 100644 --- a/pkg/ec2metadata/ec2metadata.go +++ b/pkg/ec2metadata/ec2metadata.go @@ -326,42 +326,31 @@ func retry(attempts int, sleep time.Duration, httpReq func() (*http.Response, er // GetNodeMetadata attempts to gather additional ec2 instance information from the metadata service func (e *Service) GetNodeMetadata(imdsDisabled bool) NodeMetadata { - var metadata NodeMetadata - if (!imdsDisabled) { - identityDoc, err := e.GetMetadataInfo(IdentityDocPath) - if err != nil { - log.Err(err).Msg("Unable to fetch metadata from IMDS") - return metadata - } - err = json.NewDecoder(strings.NewReader(identityDoc)).Decode(&metadata) - if err != nil { - log.Warn().Msg("Unable to fetch instance identity document from ec2 metadata") - metadata.InstanceID, _ = e.GetMetadataInfo(InstanceIDPath) - metadata.InstanceType, _ = e.GetMetadataInfo(InstanceTypePath) - metadata.LocalIP, _ = e.GetMetadataInfo(LocalIPPath) - metadata.AvailabilityZone, _ = e.GetMetadataInfo(AZPlacementPath) - if len(metadata.AvailabilityZone) > 1 { - metadata.Region = metadata.AvailabilityZone[0 : len(metadata.AvailabilityZone)-1] + metadata := NodeMetadata{} + if !imdsDisabled { + identityDoc, err := e.GetMetadataInfo(IdentityDocPath) + if err != nil { + log.Err(err).Msg("Unable to fetch metadata from IMDS") + return metadata } - } - metadata.InstanceLifeCycle, _ = e.GetMetadataInfo(InstanceLifeCycle) - metadata.LocalHostname, _ = e.GetMetadataInfo(LocalHostnamePath) - metadata.PublicHostname, _ = e.GetMetadataInfo(PublicHostnamePath) - metadata.PublicIP, _ = e.GetMetadataInfo(PublicIPPath) + err = json.NewDecoder(strings.NewReader(identityDoc)).Decode(&metadata) + if err != nil { + log.Warn().Msg("Unable to fetch instance identity document from ec2 metadata") + metadata.InstanceID, _ = e.GetMetadataInfo(InstanceIDPath) + metadata.InstanceType, _ = e.GetMetadataInfo(InstanceTypePath) + metadata.LocalIP, _ = e.GetMetadataInfo(LocalIPPath) + metadata.AvailabilityZone, _ = e.GetMetadataInfo(AZPlacementPath) + if len(metadata.AvailabilityZone) > 1 { + metadata.Region = metadata.AvailabilityZone[0 : len(metadata.AvailabilityZone)-1] + } + } + metadata.InstanceLifeCycle, _ = e.GetMetadataInfo(InstanceLifeCycle) + metadata.LocalHostname, _ = e.GetMetadataInfo(LocalHostnamePath) + metadata.PublicHostname, _ = e.GetMetadataInfo(PublicHostnamePath) + metadata.PublicIP, _ = e.GetMetadataInfo(PublicIPPath) - log.Info().Interface("metadata", metadata).Msg("Startup Metadata Retrieved") - } else { - metadata.AccountId = "" - metadata.InstanceID = "" - metadata.InstanceLifeCycle = "" - metadata.InstanceType = "" - metadata.PublicHostname = "" - metadata.PublicIP = "" - metadata.LocalHostname = "" - metadata.LocalIP = "" - metadata.AvailabilityZone = "" - metadata.Region = "" - } + log.Info().Interface("metadata", metadata).Msg("Startup Metadata Retrieved") + } return metadata } diff --git a/pkg/ec2metadata/ec2metadata_test.go b/pkg/ec2metadata/ec2metadata_test.go index 60d00fdf..fdeb2f58 100644 --- a/pkg/ec2metadata/ec2metadata_test.go +++ b/pkg/ec2metadata/ec2metadata_test.go @@ -582,11 +582,36 @@ func TestGetNodeMetadata(t *testing.T) { imds := ec2metadata.New(server.URL, 1) nodeMetadata := imds.GetNodeMetadata(false) + h.Assert(t, nodeMetadata.AccountId == "", `AccountId should be empty string (only present in SQS events)`) h.Assert(t, nodeMetadata.InstanceID == `metadata`, `Missing required NodeMetadata field InstanceID`) + h.Assert(t, nodeMetadata.InstanceLifeCycle == `metadata`, `Missing required NodeMetadata field InstanceLifeCycle`) h.Assert(t, nodeMetadata.InstanceType == `metadata`, `Missing required NodeMetadata field InstanceType`) h.Assert(t, nodeMetadata.LocalHostname == `metadata`, `Missing required NodeMetadata field LocalHostname`) h.Assert(t, nodeMetadata.LocalIP == `metadata`, `Missing required NodeMetadata field LocalIP`) h.Assert(t, nodeMetadata.PublicHostname == `metadata`, `Missing required NodeMetadata field PublicHostname`) h.Assert(t, nodeMetadata.PublicIP == `metadata`, `Missing required NodeMetadata field PublicIP`) h.Assert(t, nodeMetadata.AvailabilityZone == `metadata`, `Missing required NodeMetadata field AvailabilityZone`) + h.Assert(t, nodeMetadata.Region == `metadat`, `Region should equal AvailabilityZone with the final character truncated`) +} + +func TestGetNodeMetadataWithIMDSDisabled(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { + h.Ok(t, fmt.Errorf("IMDS was called when using Queue Processor mode")) + })) + defer server.Close() + + // Use URL from our local test server that throws errors when called + imds := ec2metadata.New(server.URL, 1) + nodeMetadata := imds.GetNodeMetadata(true) + + h.Assert(t, nodeMetadata.AccountId == "", "AccountId should be empty string") + h.Assert(t, nodeMetadata.InstanceID == "", "InstanceID should be empty string") + h.Assert(t, nodeMetadata.InstanceLifeCycle == "", "InstanceLifeCycle should be empty string") + h.Assert(t, nodeMetadata.InstanceType == "", "InstanceType should be empty string") + h.Assert(t, nodeMetadata.PublicHostname == "", "PublicHostname should be empty string") + h.Assert(t, nodeMetadata.PublicIP == "", "PublicIP should be empty string") + h.Assert(t, nodeMetadata.LocalHostname == "", "LocalHostname should be empty string") + h.Assert(t, nodeMetadata.LocalIP == "", "LocalIP should be empty string") + h.Assert(t, nodeMetadata.AvailabilityZone == "", "AvailabilityZone should be empty string") + h.Assert(t, nodeMetadata.Region == "", "Region should be empty string") }