Skip to content
This repository has been archived by the owner on Dec 24, 2019. It is now read-only.

Commit

Permalink
Add --no-scale-down option (#38)
Browse files Browse the repository at this point in the history
* Add --no-scale-down option

* Please flake8
  • Loading branch information
aermakov-zalando authored and hjacobs committed Oct 25, 2017
1 parent 2fa748c commit 07dc8e1
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 8 deletions.
24 changes: 18 additions & 6 deletions kube_aws_autoscaler/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,8 @@ def slow_down_downscale(asg_sizes: dict, nodes_by_asg_zone: dict):


def calculate_required_auto_scaling_group_sizes(nodes_by_asg_zone: dict, usage_by_asg_zone: dict,
buffer_percentage: dict, buffer_fixed: dict, buffer_spare_nodes: int=0):
buffer_percentage: dict, buffer_fixed: dict,
buffer_spare_nodes: int=0, disable_scale_down: bool=False):
asg_size = collections.defaultdict(int)

dump_info = STATS.get('last_info_dump', 0) < (time.time() - 600)
Expand Down Expand Up @@ -253,6 +254,13 @@ def calculate_required_auto_scaling_group_sizes(nodes_by_asg_zone: dict, usage_b
' '.join([format_resource(overprovisioned[r], r).rjust(10) for r in RESOURCES])))
logger.info('{}/{}: => {} nodes required (current: {})'.format(asg_name, zone, required_nodes, len(nodes)))
STATS['last_info_dump'] = time.time()

if disable_scale_down:
current_nodes = len(nodes)
if dump_info and current_nodes > required_nodes:
logger.info('{}/{}: scaling down is not allowed, forcing {} nodes'.format(asg_name, zone, current_nodes))
required_nodes = max(required_nodes, current_nodes)

asg_size[asg_name] += required_nodes

return asg_size
Expand Down Expand Up @@ -305,7 +313,7 @@ def resize_auto_scaling_groups(autoscaling, asg_size: dict, ready_nodes_by_asg:
else:
try:
autoscaling.set_desired_capacity(AutoScalingGroupName=asg_name, DesiredCapacity=desired_capacity)
except:
except Exception:
logger.exception('Failed to set desired capacity {} for ASG {}'.format(desired_capacity, asg_name))
raise

Expand Down Expand Up @@ -349,20 +357,23 @@ def start_health_endpoint():
app.run(port=5000)


def autoscale(buffer_percentage: dict, buffer_fixed: dict, buffer_spare_nodes: int=0, include_master_nodes: bool=False, dry_run: bool=False):
def autoscale(buffer_percentage: dict, buffer_fixed: dict, buffer_spare_nodes: int=0,
include_master_nodes: bool=False, dry_run: bool=False, disable_scale_down: bool=False):
api = get_kube_api()

all_nodes = get_nodes(api, include_master_nodes)
region = list(all_nodes.values())[0]['region']
autoscaling = boto3.client('autoscaling', region)
nodes_by_asg_zone = get_nodes_by_asg_zone(autoscaling, all_nodes)

# we only consider nodes found in an ASG (old "ghost" nodes returned from Kubernetes API are ignored)
nodes_by_name = get_nodes_by_name(itertools.chain(*nodes_by_asg_zone.values()))

pods = pykube.Pod.objects(api, namespace=pykube.all)

usage_by_asg_zone = calculate_usage_by_asg_zone(pods, nodes_by_name)
asg_size = calculate_required_auto_scaling_group_sizes(nodes_by_asg_zone, usage_by_asg_zone, buffer_percentage, buffer_fixed, buffer_spare_nodes)
asg_size = calculate_required_auto_scaling_group_sizes(nodes_by_asg_zone, usage_by_asg_zone, buffer_percentage, buffer_fixed,
buffer_spare_nodes=buffer_spare_nodes, disable_scale_down=disable_scale_down)
asg_size = slow_down_downscale(asg_size, nodes_by_asg_zone)
ready_nodes_by_asg = get_ready_nodes_by_asg(nodes_by_asg_zone)
resize_auto_scaling_groups(autoscaling, asg_size, ready_nodes_by_asg, dry_run)
Expand All @@ -382,6 +393,7 @@ def main():
default=os.getenv('BUFFER_SPARE_NODES', 1))
parser.add_argument('--enable-healthcheck-endpoint', help='Enable Healtcheck',
action='store_true')
parser.add_argument('--no-scale-down', help='Disable scaling down', action='store_true')
for resource in RESOURCES:
parser.add_argument('--buffer-{}-percentage'.format(resource), type=float,
help='{} buffer %%'.format(resource.capitalize()),
Expand Down Expand Up @@ -410,8 +422,8 @@ def main():
while True:
try:
autoscale(buffer_percentage, buffer_fixed, buffer_spare_nodes=args.buffer_spare_nodes,
include_master_nodes=args.include_master_nodes, dry_run=args.dry_run)
except:
include_master_nodes=args.include_master_nodes, dry_run=args.dry_run, disable_scale_down=args.no_scale_down)
except Exception:
global Healthy
Healthy = False
logger.exception('Failed to autoscale')
Expand Down
14 changes: 12 additions & 2 deletions tests/test_autoscaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,13 @@ def test_calculate_required_auto_scaling_group_sizes():
assert calculate_required_auto_scaling_group_sizes({('a1', 'z1'): [node]}, {}, {}, {}, buffer_spare_nodes=2) == {'a1': 2}


def test_calculate_required_auto_scaling_group_sizes_no_scaledown():
nodes = [{'allocatable': {'cpu': 1, 'memory': 1, 'pods': 1}, 'unschedulable': False, 'master': False},
{'allocatable': {'cpu': 1, 'memory': 1, 'pods': 1}, 'unschedulable': False, 'master': False}]
assert calculate_required_auto_scaling_group_sizes({('a1', 'z1'): nodes}, {}, {}, {}) == {'a1': 0}
assert calculate_required_auto_scaling_group_sizes({('a1', 'z1'): nodes}, {}, {}, {}, disable_scale_down=True) == {'a1': 2}


def test_calculate_required_auto_scaling_group_sizes_cordon():
node = {'name': 'mynode', 'allocatable': {'cpu': 1, 'memory': 1, 'pods': 1}, 'unschedulable': True, 'master': False, 'asg_lifecycle_state': 'InService'}
assert calculate_required_auto_scaling_group_sizes({('a1', 'z1'): [node]}, {}, {}, {}) == {'a1': 1}
Expand Down Expand Up @@ -363,7 +370,10 @@ def test_main(monkeypatch):
monkeypatch.setattr('kube_aws_autoscaler.main.autoscale', autoscale)
monkeypatch.setattr('sys.argv', ['foo', '--once', '--dry-run'])
main()
autoscale.assert_called_once_with({'memory': 10, 'pods': 10, 'cpu': 10}, {'memory': 209715200, 'pods': 10, 'cpu': 0.2}, buffer_spare_nodes=1, include_master_nodes=False, dry_run=True)
autoscale.assert_called_once_with(
{'memory': 10, 'pods': 10, 'cpu': 10},
{'memory': 209715200, 'pods': 10, 'cpu': 0.2},
buffer_spare_nodes=1, include_master_nodes=False, dry_run=True, disable_scale_down=False)

autoscale.side_effect = ValueError

Expand Down Expand Up @@ -412,4 +422,4 @@ def test_start_health_endpoint():
flask = app.test_client()
flask.testing = True
response = flask.get('/healthz')
assert response.status_code == 500
assert response.status_code == 500

0 comments on commit 07dc8e1

Please sign in to comment.