Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Completely ignore Instance Metadata when in SQS Queue mode. #735

Merged
merged 5 commits into from
Dec 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ The `enableSqsTerminationDraining` must be set to false for these configuration
The Queue Processor Mode does not allow for fine-grained configuration of which events are handled through helm configuration keys. Instead, you can modify your Amazon EventBridge rules to not send certain types of events to the SQS Queue so that NTH does not process those events. All events when operating in Queue Processor mode are Cordoned and Drained unless the `cordon-only` flag is set to true.


The `enableSqsTerminationDraining` flag turns on Queue Processor Mode. When Queue Processor Mode is enabled, IMDS mode cannot be active. NTH cannot respond to queue events AND monitor IMDS paths. Queue Processor Mode still queries for node information on startup, but this information is not required for normal operation, so it is safe to disable IMDS for the NTH pod.
The `enableSqsTerminationDraining` flag turns on Queue Processor Mode. When Queue Processor Mode is enabled, IMDS mode will be disabled, even if you explicitly enabled any of the IMDS configuration keys. NTH cannot respond to queue events AND monitor IMDS paths. In this case, it is safe to disable IMDS for the NTH pod.

<details opened>
<summary>AWS Node Termination Handler - IMDS Processor</summary>
Expand Down
27 changes: 15 additions & 12 deletions cmd/node-termination-handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,12 @@ func main() {
nthConfig.Print()
log.Fatal().Err(err).Msg("Unable to instantiate probes service,")
}
imdsDisabled := nthConfig.EnableSQSTerminationDraining

imds := ec2metadata.New(nthConfig.MetadataURL, nthConfig.MetadataTries)

interruptionEventStore := interruptioneventstore.New(nthConfig)
nodeMetadata := imds.GetNodeMetadata()
nodeMetadata := imds.GetNodeMetadata(imdsDisabled)
// Populate the aws region if available from node metadata and not already explicitly configured
if nthConfig.AWSRegion == "" && nodeMetadata.Region != "" {
nthConfig.AWSRegion = nodeMetadata.Region
Expand Down Expand Up @@ -163,17 +164,19 @@ func main() {
defer close(cancelChan)

monitoringFns := map[string]monitor.Monitor{}
if nthConfig.EnableSpotInterruptionDraining {
imdsSpotMonitor := spotitn.NewSpotInterruptionMonitor(imds, interruptionChan, cancelChan, nthConfig.NodeName)
monitoringFns[spotITN] = imdsSpotMonitor
}
if nthConfig.EnableScheduledEventDraining {
imdsScheduledEventMonitor := scheduledevent.NewScheduledEventMonitor(imds, interruptionChan, cancelChan, nthConfig.NodeName)
monitoringFns[scheduledMaintenance] = imdsScheduledEventMonitor
}
if nthConfig.EnableRebalanceMonitoring || nthConfig.EnableRebalanceDraining {
imdsRebalanceMonitor := rebalancerecommendation.NewRebalanceRecommendationMonitor(imds, interruptionChan, nthConfig.NodeName)
monitoringFns[rebalanceRecommendation] = imdsRebalanceMonitor
if !imdsDisabled {
if nthConfig.EnableSpotInterruptionDraining {
imdsSpotMonitor := spotitn.NewSpotInterruptionMonitor(imds, interruptionChan, cancelChan, nthConfig.NodeName)
monitoringFns[spotITN] = imdsSpotMonitor
}
if nthConfig.EnableScheduledEventDraining {
imdsScheduledEventMonitor := scheduledevent.NewScheduledEventMonitor(imds, interruptionChan, cancelChan, nthConfig.NodeName)
monitoringFns[scheduledMaintenance] = imdsScheduledEventMonitor
}
if nthConfig.EnableRebalanceMonitoring || nthConfig.EnableRebalanceDraining {
imdsRebalanceMonitor := rebalancerecommendation.NewRebalanceRecommendationMonitor(imds, interruptionChan, nthConfig.NodeName)
monitoringFns[rebalanceRecommendation] = imdsRebalanceMonitor
}
}
if nthConfig.EnableSQSTerminationDraining {
cfg := aws.NewConfig().WithRegion(nthConfig.AWSRegion).WithEndpoint(nthConfig.AWSEndpoint).WithSTSRegionalEndpoint(endpoints.RegionalSTSEndpoint)
Expand Down
46 changes: 24 additions & 22 deletions pkg/ec2metadata/ec2metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -325,30 +325,32 @@ func retry(attempts int, sleep time.Duration, httpReq func() (*http.Response, er
}

// GetNodeMetadata attempts to gather additional ec2 instance information from the metadata service
func (e *Service) GetNodeMetadata() NodeMetadata {
var metadata NodeMetadata
identityDoc, err := e.GetMetadataInfo(IdentityDocPath)
if err != nil {
log.Err(err).Msg("Unable to fetch metadata from IMDS")
return metadata
}
err = json.NewDecoder(strings.NewReader(identityDoc)).Decode(&metadata)
if err != nil {
log.Warn().Msg("Unable to fetch instance identity document from ec2 metadata")
metadata.InstanceID, _ = e.GetMetadataInfo(InstanceIDPath)
metadata.InstanceType, _ = e.GetMetadataInfo(InstanceTypePath)
metadata.LocalIP, _ = e.GetMetadataInfo(LocalIPPath)
metadata.AvailabilityZone, _ = e.GetMetadataInfo(AZPlacementPath)
if len(metadata.AvailabilityZone) > 1 {
metadata.Region = metadata.AvailabilityZone[0 : len(metadata.AvailabilityZone)-1]
func (e *Service) GetNodeMetadata(imdsDisabled bool) NodeMetadata {
metadata := NodeMetadata{}
if !imdsDisabled {
identityDoc, err := e.GetMetadataInfo(IdentityDocPath)
if err != nil {
log.Err(err).Msg("Unable to fetch metadata from IMDS")
return metadata
}
}
metadata.InstanceLifeCycle, _ = e.GetMetadataInfo(InstanceLifeCycle)
metadata.LocalHostname, _ = e.GetMetadataInfo(LocalHostnamePath)
metadata.PublicHostname, _ = e.GetMetadataInfo(PublicHostnamePath)
metadata.PublicIP, _ = e.GetMetadataInfo(PublicIPPath)
err = json.NewDecoder(strings.NewReader(identityDoc)).Decode(&metadata)
if err != nil {
log.Warn().Msg("Unable to fetch instance identity document from ec2 metadata")
metadata.InstanceID, _ = e.GetMetadataInfo(InstanceIDPath)
metadata.InstanceType, _ = e.GetMetadataInfo(InstanceTypePath)
metadata.LocalIP, _ = e.GetMetadataInfo(LocalIPPath)
metadata.AvailabilityZone, _ = e.GetMetadataInfo(AZPlacementPath)
if len(metadata.AvailabilityZone) > 1 {
metadata.Region = metadata.AvailabilityZone[0 : len(metadata.AvailabilityZone)-1]
}
}
metadata.InstanceLifeCycle, _ = e.GetMetadataInfo(InstanceLifeCycle)
metadata.LocalHostname, _ = e.GetMetadataInfo(LocalHostnamePath)
metadata.PublicHostname, _ = e.GetMetadataInfo(PublicHostnamePath)
metadata.PublicIP, _ = e.GetMetadataInfo(PublicIPPath)

log.Info().Interface("metadata", metadata).Msg("Startup Metadata Retrieved")
log.Info().Interface("metadata", metadata).Msg("Startup Metadata Retrieved")
}

return metadata
}
27 changes: 26 additions & 1 deletion pkg/ec2metadata/ec2metadata_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -580,13 +580,38 @@ func TestGetNodeMetadata(t *testing.T) {

// Use URL from our local test server
imds := ec2metadata.New(server.URL, 1)
nodeMetadata := imds.GetNodeMetadata()
nodeMetadata := imds.GetNodeMetadata(false)

h.Assert(t, nodeMetadata.AccountId == "", `AccountId should be empty string (only present in SQS events)`)
h.Assert(t, nodeMetadata.InstanceID == `metadata`, `Missing required NodeMetadata field InstanceID`)
h.Assert(t, nodeMetadata.InstanceLifeCycle == `metadata`, `Missing required NodeMetadata field InstanceLifeCycle`)
h.Assert(t, nodeMetadata.InstanceType == `metadata`, `Missing required NodeMetadata field InstanceType`)
h.Assert(t, nodeMetadata.LocalHostname == `metadata`, `Missing required NodeMetadata field LocalHostname`)
h.Assert(t, nodeMetadata.LocalIP == `metadata`, `Missing required NodeMetadata field LocalIP`)
h.Assert(t, nodeMetadata.PublicHostname == `metadata`, `Missing required NodeMetadata field PublicHostname`)
h.Assert(t, nodeMetadata.PublicIP == `metadata`, `Missing required NodeMetadata field PublicIP`)
h.Assert(t, nodeMetadata.AvailabilityZone == `metadata`, `Missing required NodeMetadata field AvailabilityZone`)
h.Assert(t, nodeMetadata.Region == `metadat`, `Region should equal AvailabilityZone with the final character truncated`)
}

func TestGetNodeMetadataWithIMDSDisabled(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
h.Ok(t, fmt.Errorf("IMDS was called when using Queue Processor mode"))
}))
defer server.Close()

// Use URL from our local test server that throws errors when called
imds := ec2metadata.New(server.URL, 1)
nodeMetadata := imds.GetNodeMetadata(true)

h.Assert(t, nodeMetadata.AccountId == "", "AccountId should be empty string")
h.Assert(t, nodeMetadata.InstanceID == "", "InstanceID should be empty string")
h.Assert(t, nodeMetadata.InstanceLifeCycle == "", "InstanceLifeCycle should be empty string")
h.Assert(t, nodeMetadata.InstanceType == "", "InstanceType should be empty string")
h.Assert(t, nodeMetadata.PublicHostname == "", "PublicHostname should be empty string")
h.Assert(t, nodeMetadata.PublicIP == "", "PublicIP should be empty string")
h.Assert(t, nodeMetadata.LocalHostname == "", "LocalHostname should be empty string")
h.Assert(t, nodeMetadata.LocalIP == "", "LocalIP should be empty string")
h.Assert(t, nodeMetadata.AvailabilityZone == "", "AvailabilityZone should be empty string")
h.Assert(t, nodeMetadata.Region == "", "Region should be empty string")
}