Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changed autoscaling_group to plain EC2 VMs on AWS. #2939

Merged
merged 34 commits into from
Feb 8, 2022
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
80ba4ea
Initial rewrite of auto-scaling-groups to plain ec2 vms
seriva Jan 28, 2022
5c361af
Added VM validation
seriva Jan 28, 2022
ecec916
Removed/fixed unittests
seriva Jan 28, 2022
945db62
Updated documentation.
seriva Jan 28, 2022
3de0800
Added host sorting
seriva Jan 28, 2022
c01b5cf
Minor validation fix.
seriva Jan 28, 2022
0d9e3e1
Fix terraform recreation.
seriva Jan 28, 2022
b9562f8
Fix for inventory ordering.
seriva Jan 28, 2022
cd69120
Added support for use_network_security_groups
seriva Jan 29, 2022
7b3a2a7
Updated changelog
seriva Jan 31, 2022
11221a3
Fixed typo in every TF template.
seriva Jan 31, 2022
00d5da0
Minor spacing fixes in tests.
seriva Jan 31, 2022
97360d3
Add tests for AWS host ordering.
seriva Jan 31, 2022
6bef2c8
Merge branch 'develop' into feature/2853
seriva Jan 31, 2022
a04ba36
Fix minor typo
seriva Feb 1, 2022
574e8a1
Update cli/engine/providers/aws/InfrastructureBuilder.py
seriva Feb 2, 2022
65e17af
Update cli/engine/providers/azure/InfrastructureBuilder.py
seriva Feb 2, 2022
3572726
Fixed minor typo.
seriva Feb 2, 2022
a610f64
Fixed typo.
seriva Feb 4, 2022
e6ff948
Fixed machine naming
seriva Feb 4, 2022
ca10171
Fixed names for security groups
seriva Feb 4, 2022
4b3d3e7
Minor typo fix.
seriva Feb 7, 2022
508a5c9
Add os volume name label
seriva Feb 7, 2022
8b30849
Synced tags across resources.
seriva Feb 7, 2022
7e29c25
Added support for disks
seriva Feb 7, 2022
3e9a5ae
Tagged datadisks
seriva Feb 7, 2022
d970e9c
Minor fix for device name
seriva Feb 7, 2022
472714a
Added name component for data disks
seriva Feb 7, 2022
d2dab59
Sync disk naming with Azure disk naming.
seriva Feb 7, 2022
1ed31a9
Fixed line indentations;)
seriva Feb 8, 2022
119eccd
Fixed to typo.
seriva Feb 8, 2022
1423b57
Removed whitespace.
seriva Feb 8, 2022
fa0d7fd
Updated DoD for bug reports.
seriva Feb 8, 2022
51e9332
Use index0 over index for datadisks
seriva Feb 8, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 14 additions & 9 deletions cli/engine/providers/aws/APIProxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from cli.helpers.doc_list_helpers import select_single
from cli.helpers.objdict_helpers import dict_to_objdict
from cli.models.AnsibleHostModel import AnsibleHostModel
from cli.models.AnsibleHostModel import AnsibleOrderedHostModel


class APIProxy:
Expand All @@ -26,9 +26,7 @@ def get_ips_for_feature(self, component_key):
cluster_name = self.cluster_model.specification.name.lower()
look_for_public_ip = self.cluster_model.specification.cloud.use_public_ips
vpc_id = self.get_vpc_id()

ec2 = self.session.resource('ec2')
running_instances = ec2.instances.filter(
running_instances = self.session.resource('ec2').instances.filter(
Filters=[{
'Name': 'instance-state-name',
'Values': ['running']
Expand All @@ -37,21 +35,28 @@ def get_ips_for_feature(self, component_key):
'Values': [vpc_id]
},
{
'Name': 'tag:'+component_key,
'Values': ['']
'Name': 'tag:component_key',
'Values': [component_key]
},
{
'Name': 'tag:cluster_name',
'Values': [cluster_name]
}]
)

result = []
result: List[AnsibleOrderedHostModel] = []

for instance in running_instances:
hostname = ''
for tag in instance.tags:
if tag['Key'] == 'name':
hostname = tag['Value']
if look_for_public_ip:
result.append(AnsibleHostModel(instance.public_dns_name, instance.public_ip_address))
result.append(AnsibleOrderedHostModel(hostname, instance.public_ip_address))
else:
result.append(AnsibleHostModel(instance.private_dns_name, instance.private_ip_address))
result.append(AnsibleOrderedHostModel(hostname, instance.private_ip_address))

result.sort()
return result

def get_image_id(self, os_full_name):
Expand Down
176 changes: 79 additions & 97 deletions cli/engine/providers/aws/InfrastructureBuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,15 @@ def __init__(self, docs, manifest_docs=[]):
self.cluster_name = self.cluster_model.specification.name.lower()
self.cluster_prefix = self.cluster_model.specification.prefix.lower()
self.use_network_security_groups = self.cluster_model.specification.cloud.network.use_network_security_groups
self.use_public_ips = self.cluster_model.specification.cloud.use_public_ips
self.docs = docs
self.manifest_docs = manifest_docs

# If there are no security groups Ansible provisioning will fail because
# SSH is not allowed then with public IPs on Azure.
seriva marked this conversation as resolved.
Show resolved Hide resolved
if not(self.use_network_security_groups) and self.use_public_ips:
self.logger.warning('Use of security groups has been disabled and public IP are used. Ansible run will fail because SSH will not be allowed.')
seriva marked this conversation as resolved.
Show resolved Hide resolved

def run(self):
infrastructure = []

Expand All @@ -49,62 +55,52 @@ def run(self):

efs_config = self.get_efs_config()

if not(self.use_network_security_groups):
self.logger.warning('The "use_network_security_groups" flag is currently ignored on AWS')

for component_key, component_value in self.cluster_model.specification.components.items():
if component_value['count'] < 1:
vm_count = component_value['count']
if vm_count < 1:
continue

subnets_to_create = []
security_groups_to_create = []
subnet_index = 0
asg_index = 0
for subnet_definition in component_value.subnets: # todo extract to another method or class
subnet = select_first(infrastructure, lambda item: item.kind == 'infrastructure/subnet' and
item.specification.cidr_block == subnet_definition['address_pool'])
security_group = select_first(infrastructure, lambda item: item.kind == 'infrastructure/security-group' and
item.specification.cidr_block == subnet_definition['address_pool'])

if subnet is None:
subnet = self.get_subnet(subnet_definition, component_key, vpc_name, subnet_index)
infrastructure.append(subnet)

security_group = self.get_security_group(subnet, component_key, vpc_name, subnet_index)
infrastructure.append(security_group)

route_table_association = self.get_route_table_association(route_table.specification.name,
component_key,
subnet.specification.name, subnet_index)
infrastructure.append(route_table_association)
subnet_index += 1

subnets_to_create.append(subnet)
security_groups_to_create.append(security_group)
# The vm config also contains some other stuff we use for network and security config.
# So get it here and pass it allong.
vm_config = self.get_virtual_machine(component_value)

autoscaling_group = self.get_autoscaling_group(component_key, component_value, subnets_to_create, asg_index)
# For now only one subnet per component.
if (len(component_value.subnets) > 1):
self.logger.warning('On Azure only one subnet per component is supported for now. Taking first and ignoring others.')
seriva marked this conversation as resolved.
Show resolved Hide resolved

for security_group in security_groups_to_create:
for rule in autoscaling_group.specification.security.rules:
if not self.rule_exists_in_list(security_group.specification.rules, rule):
security_group.specification.rules.append(rule)
subnet_definition = component_value.subnets[0]
subnet = select_first(infrastructure, lambda item: item.kind == 'infrastructure/subnet' and
item.specification.cidr_block == subnet_definition['address_pool'])
security_group = select_first(infrastructure, lambda item: item.kind == 'infrastructure/security-group' and
item.specification.cidr_block == subnet_definition['address_pool'])

launch_configuration = self.get_launch_configuration(autoscaling_group, component_key,
security_groups_to_create)
if subnet is None:
subnet = self.get_subnet(subnet_definition, component_key, vpc_name, 0)
infrastructure.append(subnet)

launch_configuration.specification.key_name = public_key_config.specification.key_name
if vm_config.specification.mount_efs:
self.efs_add_mount_target_config(efs_config, subnet)

self.set_image_id_for_launch_configuration(self.cluster_model, self.docs, launch_configuration,
autoscaling_group)
autoscaling_group.specification.launch_configuration = launch_configuration.specification.name
route_table_association = self.get_route_table_association(route_table.specification.name,
component_key,
subnet.specification.name, 0)
infrastructure.append(route_table_association)

if autoscaling_group.specification.mount_efs:
for subnet in subnets_to_create:
self.efs_add_mount_target_config(efs_config, subnet)
if self.use_network_security_groups:
security_group = self.get_security_group(subnet, component_key, vpc_name, 0)
for rule in vm_config.specification.security.rules:
if not self.rule_exists_in_list(security_group.specification.rules, rule):
security_group.specification.rules.append(rule)
infrastructure.append(security_group)

infrastructure.append(autoscaling_group)
infrastructure.append(launch_configuration)
asg_index += 1
for index in range(vm_count):
vm = self.get_vm(component_key,
vm_config,
subnet,
public_key_config,
security_group,
index)
infrastructure.append(vm)

if self.has_efs_any_mounts(efs_config):
infrastructure.append(efs_config)
Expand Down Expand Up @@ -136,32 +132,26 @@ def get_efs_config(self):
efs_config.specification.name = resource_name(self.cluster_prefix, self.cluster_name, 'efs')
return efs_config

def get_autoscaling_group(self, component_key, component_value, subnets_to_create, index):
autoscaling_group = dict_to_objdict(deepcopy(self.get_virtual_machine(component_value)))
autoscaling_group.specification.cluster_name = self.cluster_name
autoscaling_group.specification.name = resource_name(self.cluster_prefix, self.cluster_name, 'asg' + '-' + str(index), component_key)
autoscaling_group.specification.count = component_value.count
autoscaling_group.specification.subnet_names = [s.specification.name for s in subnets_to_create]
autoscaling_group.specification.availability_zones = [s.specification.availability_zone for s in subnets_to_create]
autoscaling_group.specification.tags.append({'cluster_name': self.cluster_name})
autoscaling_group.specification.tags.append({component_key: ''})
return autoscaling_group

def get_launch_configuration(self, autoscaling_group, component_key, security_groups_to_create):
launch_configuration = self.get_config_or_default(self.docs, 'infrastructure/launch-configuration')
launch_configuration.specification.name = resource_name(self.cluster_prefix, self.cluster_name, 'launch-config', component_key)
launch_configuration.specification.size = autoscaling_group.specification.size
launch_configuration.specification.security_groups = [s.specification.name for s in security_groups_to_create]
launch_configuration.specification.disks = autoscaling_group.specification.disks
launch_configuration.specification.ebs_optimized = autoscaling_group.specification.ebs_optimized
launch_configuration.specification.associate_public_ip = self.cluster_model.specification.cloud.use_public_ips
return launch_configuration
def get_vm(self, component_key, vm_config, subnet, public_key_config, security_group, index):
vm = dict_to_objdict(deepcopy(vm_config))
vm.specification.name = resource_name(self.cluster_prefix, self.cluster_name, 'vm' + '-' + str(index), component_key)
vm.specification.cluster_name = self.cluster_name
vm.specification.component_key = component_key
vm.specification.subnet_name = subnet.specification.name
vm.specification.key_name = public_key_config.specification.key_name
vm.specification.use_network_security_groups = self.use_network_security_groups
if self.use_network_security_groups:
vm.specification.security_groups = [security_group.specification.name]
vm.specification.associate_public_ip = self.cluster_model.specification.cloud.use_public_ips
with APIProxy(self.cluster_model, []) as proxy:
vm.specification.image_id = proxy.get_image_id(vm.specification.os_full_name)
return vm

def get_subnet(self, subnet_definition, component_key, vpc_name, index):
subnet = self.get_config_or_default(self.docs, 'infrastructure/subnet')
subnet.specification.vpc_name = vpc_name
subnet.specification.cidr_block = subnet_definition['address_pool']
subnet.specification.availability_zone = subnet_definition['availability_zone']

subnet.specification.name = resource_name(self.cluster_prefix, self.cluster_name, 'subnet' + '-' + str(index), component_key)
subnet.specification.cluster_name = self.cluster_name
return subnet
Expand Down Expand Up @@ -220,26 +210,25 @@ def get_public_key(self):
return public_key_config

def add_security_rules_inbound_efs(self, infrastructure, security_group):
ags_allowed_to_efs = select_all(infrastructure, lambda item: item.kind == 'infrastructure/virtual-machine' and
vm_allowed_to_efs = select_all(infrastructure, lambda item: item.kind == 'infrastructure/virtual-machine' and
seriva marked this conversation as resolved.
Show resolved Hide resolved
item.specification.authorized_to_efs)

for asg in ags_allowed_to_efs:
for subnet_in_asg in asg.specification.subnet_names:
subnet = select_single(infrastructure, lambda item: item.kind == 'infrastructure/subnet' and
item.specification.name == subnet_in_asg)

rule_defined = select_first(security_group.specification.rules, lambda item: item.source_address_prefix == subnet.specification.cidr_block
and item.destination_port_range == 2049)
if rule_defined is None:
rule = self.get_config_or_default(self.docs, 'infrastructure/security-group-rule')
rule.specification.name = 'sg-rule-nfs-default-from-'+subnet.specification.name
rule.specification.description = 'NFS inbound for '+subnet.specification.name
rule.specification.direction = 'ingress'
rule.specification.protocol = 'tcp'
rule.specification.destination_port_range = "2049"
rule.specification.source_address_prefix = subnet.specification.cidr_block
rule.specification.destination_address_prefix = '*'
security_group.specification.rules.append(rule.specification)
for vm in vm_allowed_to_efs:
seriva marked this conversation as resolved.
Show resolved Hide resolved
subnet = select_single(infrastructure, lambda item: item.kind == 'infrastructure/subnet' and
item.specification.name == vm.specification.subnet_name)

rule_defined = select_first(security_group.specification.rules, lambda item: item.source_address_prefix == subnet.specification.cidr_block
and item.destination_port_range == 2049)
if rule_defined is None:
rule = self.get_config_or_default(self.docs, 'infrastructure/security-group-rule')
rule.specification.name = 'sg-rule-nfs-default-from-'+subnet.specification.name
rule.specification.description = 'NFS inbound for '+subnet.specification.name
rule.specification.direction = 'ingress'
rule.specification.protocol = 'tcp'
rule.specification.destination_port_range = "2049"
rule.specification.source_address_prefix = subnet.specification.cidr_block
rule.specification.destination_address_prefix = '*'
security_group.specification.rules.append(rule.specification)

rules = []
for rule in security_group.specification.rules:
Expand Down Expand Up @@ -287,25 +276,17 @@ def get_virtual_machine(self, component_value):

@staticmethod
def efs_add_mount_target_config(efs_config, subnet):
target = select_first(efs_config.specification.mount_targets,
lambda item: item['availability_zone'] == subnet.specification.availability_zone)
if target is None:
efs_config.specification.mount_targets.append(
{'name': 'efs-'+subnet.specification.name+'-mount',
'subnet_name': subnet.specification.name,
'availability_zone': subnet.specification.availability_zone})
efs_config.specification.mount_targets.append(
{'name': 'efs-'+subnet.specification.name+'-mount',
'subnet_name': subnet.specification.name})


@staticmethod
def has_efs_any_mounts(efs_config):
if len(efs_config.specification.mount_targets) > 0:
return True
return False

@staticmethod
def set_image_id_for_launch_configuration(cluster_model, docs, launch_configuration, autoscaling_group):
with APIProxy(cluster_model, docs) as proxy:
image_id = proxy.get_image_id(autoscaling_group.specification.os_full_name)
launch_configuration.specification.image_id = image_id

@staticmethod
def get_config_or_default(docs, kind):
Expand All @@ -315,6 +296,7 @@ def get_config_or_default(docs, kind):
config['version'] = VERSION
return config


@staticmethod
def rule_exists_in_list(rule_list, rule_to_check):
for rule in rule_list:
Expand Down
14 changes: 5 additions & 9 deletions cli/engine/providers/azure/InfrastructureBuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ def __init__(self, docs, manifest_docs=[]):
self.docs = docs
self.manifest_docs = manifest_docs

# If there are no security groups Ansible provisioning will fail because
# SSH is not allowed then with public IPs on Azure.
if not(self.use_network_security_groups) and self.use_public_ips:
self.logger.warning('Use of security groups has been disabled and public IP are used. Ansible run will fail because SSH will not be allowed.')
seriva marked this conversation as resolved.
Show resolved Hide resolved

# Check if there is a hostname_domain_extension we already applied and we want to retain.
# The same as VM images we want to preserve hostname_domain_extension over versions.
self.hostname_domain_extension = self.cluster_model.specification.cloud.hostname_domain_extension
Expand Down Expand Up @@ -61,19 +66,10 @@ def run(self):
# Set property that controls cloud-init.
vm_config.specification['use_cloud_init_custom_data'] = cloud_init_custom_data.specification.enabled

# If there are no security groups Ansible provisioning will fail because
# SSH is not allowed then with public IPs on Azure.
if not(self.use_network_security_groups) and self.use_public_ips:
self.logger.warning('Use of security groups has been disabled and public IP are used. Ansible run will fail because SSH will not be allowed.')

# For now only one subnet per component.
if (len(component_value.subnets) > 1):
self.logger.warning('On Azure only one subnet per component is supported for now. Taking first and ignoring others.')

# Add message for ignoring availabiltity zones if present.
if 'availability_zone' in component_value.subnets[0]:
self.logger.warning('On Azure availability_zones are not supported yet. Ignoring definition.')

subnet_definition = component_value.subnets[0]
subnet = select_first(infrastructure, lambda item: item.kind == 'infrastructure/subnet' and
item.specification.address_prefix == subnet_definition['address_pool'])
Expand Down
Loading