diff --git a/README.rst b/README.rst index 5ec077b..20255a5 100644 --- a/README.rst +++ b/README.rst @@ -21,6 +21,8 @@ Goals: * respect Availability Zones, i.e. make sure that all AZs provide enough capacity * be deterministic and predictable, i.e. the ``DesiredCapacity`` is only calculated based on the current cluster state * scale down slowly to mitigate service disruptions, i.e. at most one node at a time +* support "elastic" workloads like daily up/down scaling +* support AWS Spot Fleet (not yet implemented) * require a minimum amount of configuration (preferably none) * keep it simple @@ -32,6 +34,13 @@ This hack was created as a proof of concept and born out of frustration with the * it requires unnecessary configuration * the code is quite complex +Disclaimer +========== + +** Use at your own risk! ** +This autoscaler was only tested with Kubernetes version 1.5.2. +There is no guarantee that it works in previous Kubernetes versions. + How it works ============ @@ -48,7 +57,7 @@ The ``autoscale`` function performs the following task: * iterate through every ASG/AZ combination * use the calculated resource usage (sum of resource requests) and add the resource requests of any unassigned pods (pods not scheduled on any node yet) * apply the configured buffer values (10% extra for CPU and memory by default) - * find the capacity of the weakest node + * find the `allocatable capacity`_ of the weakest node * calculate the number of required nodes by adding up the capacity of the weakest node until the sum is greater than or equal to requested+buffer for both CPU and memory * sum up the number of required nodes from all AZ for the ASG @@ -99,3 +108,4 @@ The following command line options are supported: .. _"official" cluster-autoscaler: https://github.com/kubernetes/contrib/tree/master/cluster-autoscaler +.. _allocatable capacity: https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md diff --git a/kube_aws_autoscaler/main.py b/kube_aws_autoscaler/main.py index 279a825..d8cb1a0 100755 --- a/kube_aws_autoscaler/main.py +++ b/kube_aws_autoscaler/main.py @@ -47,9 +47,9 @@ def parse_resource(v: str): return int(match.group(1)) * factor -def get_node_capacity_tuple(node: dict): - capacity = node['capacity'] - return tuple(capacity[resource] for resource in RESOURCES) +def get_node_allocatable_tuple(node: dict): + allocatable = node['allocatable'] + return tuple(allocatable[resource] for resource in RESOURCES) def apply_buffer(requested: dict, buffer_percentage: dict, buffer_fixed: dict): @@ -60,11 +60,11 @@ def apply_buffer(requested: dict, buffer_percentage: dict, buffer_fixed: dict): def find_weakest_node(nodes): - return sorted(nodes, key=get_node_capacity_tuple)[0] + return sorted(nodes, key=get_node_allocatable_tuple)[0] -def is_sufficient(requested: dict, capacity: dict): - for resource, cap in capacity.items(): +def is_sufficient(requested: dict, allocatable: dict): + for resource, cap in allocatable.items(): if requested.get(resource, 0) > cap: return False return True @@ -86,13 +86,15 @@ def get_nodes(api) -> dict: region = node.labels['failure-domain.beta.kubernetes.io/region'] zone = node.labels['failure-domain.beta.kubernetes.io/zone'] instance_type = node.labels['beta.kubernetes.io/instance-type'] - capacity = {} - for key, val in node.obj['status']['capacity'].items(): - capacity[key] = parse_resource(val) + allocatable = {} + # Use the Node Allocatable Resources to account for any kube/system reservations: + # https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md + for key, val in node.obj['status']['allocatable'].items(): + allocatable[key] = parse_resource(val) instance_id = node.obj['spec']['externalID'] obj = {'name': node.name, 'region': region, 'zone': zone, 'instance_id': instance_id, 'instance_type': instance_type, - 'capacity': capacity, + 'allocatable': allocatable, 'ready': is_node_ready(node), 'unschedulable': node.obj['spec'].get('unschedulable', False), 'master': node.labels.get('master', 'false') == 'true'} @@ -201,10 +203,10 @@ def calculate_required_auto_scaling_group_sizes(nodes_by_asg_zone: dict, usage_b requested_with_buffer = apply_buffer(requested, buffer_percentage, buffer_fixed) weakest_node = find_weakest_node(nodes) required_nodes = 0 - capacity = {resource: 0 for resource in RESOURCES} - while not is_sufficient(requested_with_buffer, capacity): - for resource in capacity: - capacity[resource] += weakest_node['capacity'][resource] + allocatable = {resource: 0 for resource in RESOURCES} + while not is_sufficient(requested_with_buffer, allocatable): + for resource in allocatable: + allocatable[resource] += weakest_node['allocatable'][resource] required_nodes += 1 for node in nodes: @@ -215,7 +217,7 @@ def calculate_required_auto_scaling_group_sizes(nodes_by_asg_zone: dict, usage_b required_nodes += 1 overprovisioned = {resource: 0 for resource in RESOURCES} - for resource, value in capacity.items(): + for resource, value in allocatable.items(): overprovisioned[resource] = value - requested[resource] if dump_info: @@ -226,7 +228,7 @@ def calculate_required_auto_scaling_group_sizes(nodes_by_asg_zone: dict, usage_b logger.info('{}/{}: with buffer: {}'.format(asg_name, zone, ' '.join([format_resource(requested_with_buffer[r], r).rjust(10) for r in RESOURCES]))) logger.info('{}/{}: weakest node: {}'.format(asg_name, zone, - ' '.join([format_resource(weakest_node['capacity'][r], r).rjust(10) for r in RESOURCES]))) + ' '.join([format_resource(weakest_node['allocatable'][r], r).rjust(10) for r in RESOURCES]))) logger.info('{}/{}: overprovision: {}'.format(asg_name, zone, ' '.join([format_resource(overprovisioned[r], r).rjust(10) for r in RESOURCES]))) logger.info('{}/{}: => {} nodes required (current: {})'.format(asg_name, zone, required_nodes, len(nodes))) diff --git a/tests/test_autoscaler.py b/tests/test_autoscaler.py index 220474a..5f01b3b 100644 --- a/tests/test_autoscaler.py +++ b/tests/test_autoscaler.py @@ -66,20 +66,20 @@ def test_calculate_usage_by_asg_zone(): def test_calculate_required_auto_scaling_group_sizes(): assert calculate_required_auto_scaling_group_sizes({}, {}, {}, {}) == {} - node = {'capacity': {'cpu': 1, 'memory': 1, 'pods': 1}, 'unschedulable': False, 'master': False} + node = {'allocatable': {'cpu': 1, 'memory': 1, 'pods': 1}, 'unschedulable': False, 'master': False} assert calculate_required_auto_scaling_group_sizes({('a1', 'z1'): [node]}, {}, {}, {}) == {'a1': 0} assert calculate_required_auto_scaling_group_sizes({('a1', 'z1'): [node]}, {('a1', 'z1'): {'cpu': 1, 'memory': 1, 'pods': 1}}, {}, {}) == {'a1': 1} assert calculate_required_auto_scaling_group_sizes({('a1', 'z1'): [node]}, {('unknown', 'unknown'): {'cpu': 1, 'memory': 1, 'pods': 1}}, {}, {}) == {'a1': 1} def test_calculate_required_auto_scaling_group_sizes_cordon(): - node = {'name': 'mynode', 'capacity': {'cpu': 1, 'memory': 1, 'pods': 1}, 'unschedulable': True, 'master': False, 'asg_lifecycle_state': 'InService'} + node = {'name': 'mynode', 'allocatable': {'cpu': 1, 'memory': 1, 'pods': 1}, 'unschedulable': True, 'master': False, 'asg_lifecycle_state': 'InService'} assert calculate_required_auto_scaling_group_sizes({('a1', 'z1'): [node]}, {}, {}, {}) == {'a1': 1} assert calculate_required_auto_scaling_group_sizes({('a1', 'z1'): [node]}, {('a1', 'z1'): {'cpu': 1, 'memory': 1, 'pods': 1}}, {}, {}) == {'a1': 2} def test_calculate_required_auto_scaling_group_sizes_unschedulable_terminating(): - node = {'name': 'mynode', 'capacity': {'cpu': 1, 'memory': 1, 'pods': 1}, 'unschedulable': True, 'master': False, 'asg_lifecycle_state': 'Terminating'} + node = {'name': 'mynode', 'allocatable': {'cpu': 1, 'memory': 1, 'pods': 1}, 'unschedulable': True, 'master': False, 'asg_lifecycle_state': 'Terminating'} # do not compensate if the instance is terminating.. (it will probably be replaced by ASG) assert calculate_required_auto_scaling_group_sizes({('a1', 'z1'): [node]}, {}, {}, {}) == {'a1': 0} assert calculate_required_auto_scaling_group_sizes({('a1', 'z1'): [node]}, {('a1', 'z1'): {'cpu': 1, 'memory': 1, 'pods': 1}}, {}, {}) == {'a1': 1} @@ -246,7 +246,7 @@ def test_get_nodes(monkeypatch): 'beta.kubernetes.io/instance-type': 'x1.mega' } node.obj = { - 'status': {'capacity': {'cpu': '2', 'memory': '16Gi', 'pods': '10'}}, + 'status': {'allocatable': {'cpu': '2', 'memory': '16Gi', 'pods': '10'}}, 'spec': {'externalID': 'i-123'} } @@ -257,7 +257,7 @@ def test_get_nodes(monkeypatch): assert get_nodes(api) == {'n1': { 'name': 'n1', 'region': 'eu-north-1', 'zone': 'eu-north-1a', 'instance_id': 'i-123', 'instance_type': 'x1.mega', - 'capacity': {'cpu': 2, 'memory': 16*1024*1024*1024, 'pods': 10}, + 'allocatable': {'cpu': 2, 'memory': 16*1024*1024*1024, 'pods': 10}, 'ready': False, 'unschedulable': False, 'master': False}} @@ -278,7 +278,7 @@ def test_autoscale(monkeypatch): get_nodes.return_value = {'n1': { 'name': 'n1', 'region': 'eu-north-1', 'zone': 'eu-north-1a', 'instance_id': 'i-123', 'instance_type': 'x1.mega', - 'capacity': {'cpu': 2, 'memory': 16*1024*1024*1024, 'pods': 10}, + 'allocatable': {'cpu': 2, 'memory': 16*1024*1024*1024, 'pods': 10}, 'ready': True, 'unschedulable': False, 'master': False}} @@ -309,7 +309,7 @@ def test_autoscale_node_without_asg(monkeypatch): get_nodes.return_value = {'n1': { 'name': 'n1', 'region': 'eu-north-1', 'zone': 'eu-north-1a', 'instance_id': 'i-123', 'instance_type': 'x1.mega', - 'capacity': {'cpu': 2, 'memory': 16*1024*1024*1024, 'pods': 10}, + 'allocatable': {'cpu': 2, 'memory': 16*1024*1024*1024, 'pods': 10}, 'ready': True, 'unschedulable': False, 'master': False}}