Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport #6617 ionoscloud: Update ionos-cloud sdk-go and add metrics into CA 1.29 #6629

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion cluster-autoscaler/cloudprovider/ionoscloud/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,18 @@ Store the token in a secret:
kubectl create secret generic cloud-config --from-literal=token=MY_TOKEN
```

Edit [`example-values.yaml`](./examples-values.yaml) and deploy using helm:
Edit [`example-values.yaml`](./example-values.yaml) and deploy using helm:

```console
helm install ionoscloud-cluster-autoscaler autoscaler/cluster-autoscaler -f example-values.yaml
```

### Configuration

The `IONOS_ADDITIONAL_HEADERS` environment variable can be used to configure the client to send additional headers on
each Ionos Cloud API request, such as `X-Contract-Number`. This can be useful for users with multiple contracts.
The format is a semicolon-separated list of key:value pairs, e.g. `IONOS_ADDITIONAL_HEADERS="X-Contract-Number:1234657890"`.

## Development

The unit tests use mocks generated by [mockery](https://github.com/vektra/mockery/v2). To update them run:
Expand All @@ -40,6 +46,11 @@ mockery --inpackage --testonly --case snake --boilerplate-file ../../../hack/boi
mockery --inpackage --testonly --case snake --boilerplate-file ../../../hack/boilerplate/boilerplate.generatego.txt --name IonosCloudManager
```

### Debugging

The global logging verbosity controlled by the `--v` flag is passed on to the Ionos Cloud SDK client logger.
Verbosity 5 maps to `Debug` and 7 to `Trace`. **Do not enable this in production, as it will print full request and response data.**

### Build an image

Build and push a docker image in the `cluster-autoscaler` directory:
Expand Down
84 changes: 26 additions & 58 deletions cluster-autoscaler/cloudprovider/ionoscloud/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,29 +17,20 @@ limitations under the License.
package ionoscloud

import (
"maps"
"sync"
"time"

"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/klog/v2"
)

const nodeGroupCacheEntryTimeout = 2 * time.Minute

type nodeGroupCacheEntry struct {
data cloudprovider.NodeGroup
ts time.Time
}

var timeNow = time.Now

// IonosCache caches resources to reduce API calls.
// Cached state includes autoscaling limits, sizes and target sizes, a mapping of instances to node
// groups, and a simple lock mechanism to prevent invalid node group writes.
type IonosCache struct {
mutex sync.Mutex

nodeGroups map[string]nodeGroupCacheEntry
nodeGroups map[string]cloudprovider.NodeGroup
nodesToNodeGroups map[string]string
nodeGroupSizes map[string]int
nodeGroupTargetSizes map[string]int
Expand All @@ -49,11 +40,11 @@ type IonosCache struct {
// NewIonosCache initializes a new IonosCache.
func NewIonosCache() *IonosCache {
return &IonosCache{
nodeGroups: map[string]nodeGroupCacheEntry{},
nodesToNodeGroups: map[string]string{},
nodeGroupSizes: map[string]int{},
nodeGroupTargetSizes: map[string]int{},
nodeGroupLockTable: map[string]bool{},
nodeGroups: make(map[string]cloudprovider.NodeGroup),
nodesToNodeGroups: make(map[string]string),
nodeGroupSizes: make(map[string]int),
nodeGroupTargetSizes: make(map[string]int),
nodeGroupLockTable: make(map[string]bool),
}
}

Expand All @@ -62,15 +53,7 @@ func (cache *IonosCache) AddNodeGroup(newPool cloudprovider.NodeGroup) {
cache.mutex.Lock()
defer cache.mutex.Unlock()

cache.nodeGroups[newPool.Id()] = nodeGroupCacheEntry{data: newPool}
}

func (cache *IonosCache) removeNodesForNodeGroupNoLock(id string) {
for nodeId, nodeGroupId := range cache.nodesToNodeGroups {
if nodeGroupId == id {
delete(cache.nodesToNodeGroups, nodeId)
}
}
cache.nodeGroups[newPool.Id()] = newPool
}

// RemoveInstanceFromCache deletes an instance and its respective mapping to the node group from
Expand All @@ -79,38 +62,39 @@ func (cache *IonosCache) RemoveInstanceFromCache(id string) {
cache.mutex.Lock()
defer cache.mutex.Unlock()

klog.V(5).Infof("Removed instance %s from cache", id)
nodeGroupId := cache.nodesToNodeGroups[id]
delete(cache.nodesToNodeGroups, id)
cache.updateNodeGroupTimestampNoLock(nodeGroupId)
if _, ok := cache.nodesToNodeGroups[id]; ok {
delete(cache.nodesToNodeGroups, id)
klog.V(5).Infof("Removed instance %s from cache", id)
}
}

// SetInstancesCacheForNodeGroup overwrites cached instances and mappings for a node group.
func (cache *IonosCache) SetInstancesCacheForNodeGroup(id string, instances []cloudprovider.Instance) {
cache.mutex.Lock()
defer cache.mutex.Unlock()

cache.removeNodesForNodeGroupNoLock(id)
maps.DeleteFunc(cache.nodesToNodeGroups, func(_, nodeGroupID string) bool {
return nodeGroupID == id
})
cache.setInstancesCacheForNodeGroupNoLock(id, instances)
}

func (cache *IonosCache) setInstancesCacheForNodeGroupNoLock(id string, instances []cloudprovider.Instance) {
for _, instance := range instances {
nodeId := convertToNodeId(instance.Id)
cache.nodesToNodeGroups[nodeId] = id
nodeID := convertToNodeID(instance.Id)
cache.nodesToNodeGroups[nodeID] = id
}
cache.updateNodeGroupTimestampNoLock(id)
}

// GetNodeGroupIds returns an unsorted list of cached node group ids.
func (cache *IonosCache) GetNodeGroupIds() []string {
// GetNodeGroupIDs returns an unsorted list of cached node group ids.
func (cache *IonosCache) GetNodeGroupIDs() []string {
cache.mutex.Lock()
defer cache.mutex.Unlock()

return cache.getNodeGroupIds()
return cache.getNodeGroupIDs()
}

func (cache *IonosCache) getNodeGroupIds() []string {
func (cache *IonosCache) getNodeGroupIDs() []string {
ids := make([]string, 0, len(cache.nodeGroups))
for id := range cache.nodeGroups {
ids = append(ids, id)
Expand All @@ -125,26 +109,26 @@ func (cache *IonosCache) GetNodeGroups() []cloudprovider.NodeGroup {

nodeGroups := make([]cloudprovider.NodeGroup, 0, len(cache.nodeGroups))
for id := range cache.nodeGroups {
nodeGroups = append(nodeGroups, cache.nodeGroups[id].data)
nodeGroups = append(nodeGroups, cache.nodeGroups[id])
}
return nodeGroups
}

// GetNodeGroupForNode returns the node group for the given node.
// Returns nil if either the mapping or the node group is not cached.
func (cache *IonosCache) GetNodeGroupForNode(nodeId string) cloudprovider.NodeGroup {
func (cache *IonosCache) GetNodeGroupForNode(nodeID string) cloudprovider.NodeGroup {
cache.mutex.Lock()
defer cache.mutex.Unlock()

id, found := cache.nodesToNodeGroups[nodeId]
nodeGroupID, found := cache.nodesToNodeGroups[nodeID]
if !found {
return nil
}
entry, found := cache.nodeGroups[id]
nodeGroup, found := cache.nodeGroups[nodeGroupID]
if !found {
return nil
}
return entry.data
return nodeGroup
}

// TryLockNodeGroup tries to write a node group lock entry.
Expand Down Expand Up @@ -219,19 +203,3 @@ func (cache *IonosCache) InvalidateNodeGroupTargetSize(id string) {

delete(cache.nodeGroupTargetSizes, id)
}

// NodeGroupNeedsRefresh returns true when the instances for the given node group have not been
// updated for more than 2 minutes.
func (cache *IonosCache) NodeGroupNeedsRefresh(id string) bool {
cache.mutex.Lock()
defer cache.mutex.Unlock()

return timeNow().Sub(cache.nodeGroups[id].ts) > nodeGroupCacheEntryTimeout
}

func (cache *IonosCache) updateNodeGroupTimestampNoLock(id string) {
if entry, ok := cache.nodeGroups[id]; ok {
entry.ts = timeNow()
cache.nodeGroups[id] = entry
}
}
35 changes: 4 additions & 31 deletions cluster-autoscaler/cloudprovider/ionoscloud/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,11 @@ package ionoscloud

import (
"testing"
"time"

"github.com/stretchr/testify/require"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
)

func newCacheEntry(data cloudprovider.NodeGroup, ts time.Time) nodeGroupCacheEntry {
return nodeGroupCacheEntry{data: data, ts: ts}
}

func TestCache_AddNodeGroup(t *testing.T) {
cache := NewIonosCache()
require.Empty(t, cache.GetNodeGroups())
Expand All @@ -36,17 +31,14 @@ func TestCache_AddNodeGroup(t *testing.T) {
}

func TestCache_RemoveInstanceFromCache(t *testing.T) {
firstTime := timeNow().Add(-2*time.Minute - 1*time.Second)
cache := NewIonosCache()
cache.nodeGroups["2"] = newCacheEntry(&nodePool{id: "2"}, firstTime)
cache.nodeGroups["2"] = &nodePool{id: "2"}
cache.nodesToNodeGroups["b2"] = "2"

require.NotNil(t, cache.GetNodeGroupForNode("b2"))
require.True(t, cache.NodeGroupNeedsRefresh("2"))

cache.RemoveInstanceFromCache("b2")
require.Nil(t, cache.GetNodeGroupForNode("b2"))
require.False(t, cache.NodeGroupNeedsRefresh("2"))
}

func TestCache_SetInstancesCacheForNodeGroup(t *testing.T) {
Expand All @@ -65,11 +57,11 @@ func TestCache_SetInstancesCacheForNodeGroup(t *testing.T) {

func TestCache_GetNodeGroupIDs(t *testing.T) {
cache := NewIonosCache()
require.Empty(t, cache.GetNodeGroupIds())
require.Empty(t, cache.GetNodeGroupIDs())
cache.AddNodeGroup(&nodePool{id: "1"})
require.Equal(t, []string{"1"}, cache.GetNodeGroupIds())
require.Equal(t, []string{"1"}, cache.GetNodeGroupIDs())
cache.AddNodeGroup(&nodePool{id: "2"})
require.ElementsMatch(t, []string{"1", "2"}, cache.GetNodeGroupIds())
require.ElementsMatch(t, []string{"1", "2"}, cache.GetNodeGroupIDs())
}

func TestCache_GetNodeGroups(t *testing.T) {
Expand Down Expand Up @@ -139,22 +131,3 @@ func TestCache_GetSetNodeGroupTargetSize(t *testing.T) {
require.False(t, found)
require.Zero(t, size)
}

func TestCache_NodeGroupNeedsRefresh(t *testing.T) {
fixedTime := time.Now().Round(time.Second)
timeNow = func() time.Time { return fixedTime }
defer func() { timeNow = time.Now }()

cache := NewIonosCache()
require.True(t, cache.NodeGroupNeedsRefresh("test"))

cache.AddNodeGroup(&nodePool{id: "test"})
require.True(t, cache.NodeGroupNeedsRefresh("test"))
cache.SetInstancesCacheForNodeGroup("test", nil)
require.False(t, cache.NodeGroupNeedsRefresh("test"))

timeNow = func() time.Time { return fixedTime.Add(nodeGroupCacheEntryTimeout) }
require.False(t, cache.NodeGroupNeedsRefresh("test"))
timeNow = func() time.Time { return fixedTime.Add(nodeGroupCacheEntryTimeout + 1*time.Second) }
require.True(t, cache.NodeGroupNeedsRefresh("test"))
}
Loading
Loading