From 2965ddf0eecf1c654472b0254c2437f30fd179a8 Mon Sep 17 00:00:00 2001 From: lnhanks <67074258+lnhanks@users.noreply.github.com> Date: Wed, 13 Sep 2023 07:35:03 -0700 Subject: [PATCH] Only metrics (#2557) Prometheus metrics for capturing ENI IP usage and no available IP address errors Co-authored-by: Lindsay Hanks --- pkg/ipamd/datastore/data_store.go | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/pkg/ipamd/datastore/data_store.go b/pkg/ipamd/datastore/data_store.go index 410ba9f3c6..8adc2965ab 100644 --- a/pkg/ipamd/datastore/data_store.go +++ b/pkg/ipamd/datastore/data_store.go @@ -122,6 +122,19 @@ var ( }, []string{"cidr"}, ) + noAvailableIPAddrs = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "awscni_err_no_avail_addrs", + Help: "The number of pod IP assignments that fail due to no available IP addresses", + }, + ) + eniIPsInUse = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "awscni_eni_util", + Help: "The number of allocated ips partitioned by eni", + }, + []string{"eni"}, + ) prometheusRegistered = false ) @@ -344,6 +357,8 @@ func prometheusRegister() { prometheus.MustRegister(forceRemovedIPs) prometheus.MustRegister(totalPrefixes) prometheus.MustRegister(ipsPerCidr) + prometheus.MustRegister(noAvailableIPAddrs) + prometheus.MustRegister(eniIPsInUse) prometheusRegistered = true } } @@ -437,6 +452,8 @@ func (ds *DataStore) ReadBackingStore(isv6Enabled bool) error { cidr.IPAddresses[ipAddr.String()] = addr ds.assignPodIPAddressUnsafe(addr, allocation.IPAMKey, allocation.Metadata, time.Unix(0, allocation.AllocationTimestamp)) ds.log.Debugf("Recovered %s => %s/%s", allocation.IPAMKey, eni.ID, addr.Address) + // Increment ENI IP usage upon finding assigned ips + eniIPsInUse.WithLabelValues(eni.ID).Inc() // Update prometheus for ips per cidr // Secondary IP mode will have /32:1 and Prefix mode will have /28: ipsPerCidr.With(prometheus.Labels{"cidr": cidr.Cidr.String()}).Inc() @@ -522,6 +539,8 @@ func (ds *DataStore) AddENI(eniID string, deviceNumber int, isPrimary, isTrunk, AvailableIPv4Cidrs: make(map[string]*CidrInfo)} enis.Set(float64(len(ds.eniPool))) + // Initialize ENI IPs In Use to 0 when an ENI is created + eniIPsInUse.WithLabelValues(eniID).Set(0) return nil } @@ -711,9 +730,12 @@ func (ds *DataStore) AssignPodIPv6Address(ipamKey IPAMKey, ipamMetadata IPAMMeta delete(V6Cidr.IPAddresses, addr.Address) return "", -1, err } + // Increment ENI IP usage on pod IPv6 allocation + eniIPsInUse.WithLabelValues(eni.ID).Inc() return addr.Address, eni.DeviceNumber, nil } } + noAvailableIPAddrs.Inc() return "", -1, errors.New("assignPodIPv6AddressUnsafe: no available IP addresses") } @@ -776,11 +798,14 @@ func (ds *DataStore) AssignPodIPv4Address(ipamKey IPAMKey, ipamMetadata IPAMMeta ipsPerCidr.With(prometheus.Labels{"cidr": availableCidr.Cidr.String()}).Dec() return "", -1, err } + // Increment ENI IP usage on pod IPv4 allocation + eniIPsInUse.WithLabelValues(eni.ID).Inc() return addr.Address, eni.DeviceNumber, nil } ds.log.Debugf("AssignPodIPv4Address: ENI %s does not have available addresses", eni.ID) } + noAvailableIPAddrs.Inc() ds.log.Errorf("DataStore has no available IP/Prefix addresses") return "", -1, errors.New("assignPodIPv4AddressUnsafe: no available IP/Prefix addresses") } @@ -1072,6 +1097,8 @@ func (ds *DataStore) RemoveUnusedENIFromStore(warmIPTarget, minimumIPTarget, war // Prometheus update enis.Set(float64(len(ds.eniPool))) + // Delete ENI IPs In Use when ENI is removed + eniIPsInUse.DeleteLabelValues(removableENI) totalIPs.Set(float64(ds.total)) return removableENI } @@ -1126,6 +1153,8 @@ func (ds *DataStore) RemoveENIFromDataStore(eniID string, force bool) error { // Prometheus gauge enis.Set(float64(len(ds.eniPool))) + // Delete ENI IPs In Use when ENI is removed + eniIPsInUse.DeleteLabelValues(eniID) return nil } @@ -1166,6 +1195,8 @@ func (ds *DataStore) UnassignPodIPAddress(ipamKey IPAMKey) (e *ENI, ip string, d ipsPerCidr.With(prometheus.Labels{"cidr": availableCidr.Cidr.String()}).Dec() ds.log.Infof("UnassignPodIPAddress: sandbox %s's ipAddr %s, DeviceNumber %d", ipamKey, addr.Address, eni.DeviceNumber) + // Decrement ENI IP usage when a pod is deallocated + eniIPsInUse.WithLabelValues(eni.ID).Dec() return eni, addr.Address, eni.DeviceNumber, nil }