diff --git a/README.md b/README.md index 177eb508c..4712405a8 100644 --- a/README.md +++ b/README.md @@ -143,6 +143,19 @@ Once you have created the parameter, you must remove the variable `runners_token Finally, the runner still supports the manual runner creation. No changes are required. Please keep in mind that this setup will be removed in future releases. +### Auto Scaling Group Instance Termination + +The Auto Scaling Group may be configured with a +[lifecycle hook](https://docs.aws.amazon.com/autoscaling/ec2/userguide/lifecycle-hooks.html) +that executes a provided Lambda function when the runner is terminated to +terminate additional instances that were spawned. + +The use of the termination lifecycle can be toggled using the +`asg_termination_lifecycle_hook_create` variable. + +When using this feature, a `builds/` directory relative to the root module will +persist that contains the packaged Lambda function. + ### Access runner instance A few option are provided to access the runner instance: @@ -259,25 +272,33 @@ terraform destroy | Name | Source | Version | |------|--------|---------| | [cache](#module\_cache) | ./modules/cache | n/a | +| [terminate\_instances\_lifecycle\_function](#module\_terminate\_instances\_lifecycle\_function) | ./modules/terminate-instances | n/a | ## Resources | Name | Type | |------|------| +| [archive_file.terminate_runner_instances_lambda](https://registry.terraform.io/providers/hashicorp/archive/latest/docs/data-sources/file) | data source | | [aws_autoscaling_group.gitlab_runner_instance](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/autoscaling_group) | resource | +| [aws_autoscaling_lifecycle_hook.terminate_instances](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/autoscaling_lifecycle_hook) | resource | | [aws_autoscaling_schedule.scale_in](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/autoscaling_schedule) | resource | | [aws_autoscaling_schedule.scale_out](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/autoscaling_schedule) | resource | +| [aws_cloudwatch_event_rule.terminate_instances](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_rule) | resource | +| [aws_cloudwatch_event_target.terminate_instances](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_target) | resource | | [aws_cloudwatch_log_group.environment](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group) | resource | +| [aws_cloudwatch_log_group.lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group) | resource | | [aws_eip.gitlab_runner](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/eip) | resource | | [aws_iam_instance_profile.docker_machine](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_instance_profile) | resource | | [aws_iam_instance_profile.instance](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_instance_profile) | resource | | [aws_iam_policy.eip](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | | [aws_iam_policy.instance_docker_machine_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | | [aws_iam_policy.instance_session_manager_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | +| [aws_iam_policy.lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | | [aws_iam_policy.service_linked_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | | [aws_iam_policy.ssm](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | | [aws_iam_role.docker_machine](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | | [aws_iam_role.instance](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | +| [aws_iam_role.lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | | [aws_iam_role_policy.instance](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy) | resource | | [aws_iam_role_policy_attachment.docker_machine_cache_instance](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | | [aws_iam_role_policy_attachment.docker_machine_session_manager_aws_managed](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | @@ -286,11 +307,17 @@ terraform destroy | [aws_iam_role_policy_attachment.instance_docker_machine_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | | [aws_iam_role_policy_attachment.instance_session_manager_aws_managed](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | | [aws_iam_role_policy_attachment.instance_session_manager_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | +| [aws_iam_role_policy_attachment.lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | | [aws_iam_role_policy_attachment.service_linked_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | | [aws_iam_role_policy_attachment.ssm](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | | [aws_iam_role_policy_attachment.user_defined_policies](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | +| [aws_iam_policy_document.assume_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | | [aws_kms_alias.default](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/kms_alias) | resource | | [aws_kms_key.default](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/kms_key) | resource | +| [aws_lambda_function.terminate_runner_instances](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function) | resource | +| [aws_lambda_permission.current_version_triggers](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_permission) | resource | +| [aws_lambda_permission.unqualified_alias_triggers](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_permission) | resource | | [aws_launch_template.gitlab_runner_instance](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/launch_template) | resource | | [aws_security_group.docker_machine](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group) | resource | | [aws_security_group.runner](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group) | resource | @@ -322,6 +349,11 @@ terraform destroy | [arn\_format](#input\_arn\_format) | ARN format to be used. May be changed to support deployment in GovCloud/China regions. | `string` | `"arn:aws"` | no | | [asg\_delete\_timeout](#input\_asg\_delete\_timeout) | Timeout when trying to delete the Runner ASG. | `string` | `"10m"` | no | | [asg\_max\_instance\_lifetime](#input\_asg\_max\_instance\_lifetime) | The seconds before an instance is refreshed in the ASG. | `number` | `null` | no | +| [asg\_terminate\_lifecycle\_hook\_create](#input\_asg\_terminate\_lifecycle\_hook\_create) | Boolean toggling the creation of the ASG instance terminate lifecycle hook. | `bool` | `true` | no | +| [asg\_terminate\_lifecycle\_hook\_heartbeat\_timeout](#input\_asg\_terminate\_lifecycle\_hook\_heartbeat\_timeout) | The amount of time, in seconds, for the instances to remain in wait state. | `number` | `90` | no | +| [asg\_terminate\_lifecycle\_hook\_name](#input\_asg\_terminate\_lifecycle\_hook\_name) | Specifies a custom name for the ASG terminate lifecycle hook and related resources. | `string` | `null` | no | +| [asg\_terminate\_lifecycle\_lambda\_memory\_size](#input\_asg\_terminate\_lifecycle\_lambda\_memory\_size) | The memory size in MB to allocate to the terminate-instances Lambda function. | `number` | `128` | no | +| [asg\_terminate\_lifecycle\_lambda\_timeout](#input\_asg\_terminate\_lifecycle\_lambda\_timeout) | Amount of time the terminate-instances Lambda Function has to run in seconds. | `number` | `30` | no | | [aws\_region](#input\_aws\_region) | AWS region. | `string` | n/a | yes | | [cache\_bucket](#input\_cache\_bucket) | Configuration to control the creation of the cache bucket. By default the bucket will be created and used as shared cache. To use the same cache across multiple runners disable the creation of the cache and provide a policy and bucket name. See the public runner example for more details. | `map(any)` |
{
"bucket": "",
"create": true,
"policy": ""
}
| no | | [cache\_bucket\_name\_include\_account\_id](#input\_cache\_bucket\_name\_include\_account\_id) | Boolean to add current account ID to cache bucket name. | `bool` | `true` | no | diff --git a/main.tf b/main.tf index ea9dca7cd..b08043eba 100644 --- a/main.tf +++ b/main.tf @@ -111,7 +111,7 @@ locals { runners_additional_volumes = local.runners_additional_volumes docker_machine_options = length(local.docker_machine_options_string) == 1 ? "" : local.docker_machine_options_string runners_name = var.runners_name - runners_tags = replace(var.overrides["name_docker_machine_runners"] == "" ? format( + runners_tags = replace(replace(var.overrides["name_docker_machine_runners"] == "" ? format( "Name,%s-docker-machine,%s,%s", var.environment, local.tags_string, @@ -121,7 +121,7 @@ locals { local.tags_string, local.runner_tags_string, var.overrides["name_docker_machine_runners"], - ), ",,", ",") + ), ",,", ","), "/,$/", "") runners_token = var.runners_token runners_executor = var.runners_executor runners_limit = var.runners_limit @@ -504,3 +504,24 @@ resource "aws_iam_role_policy_attachment" "eip" { role = aws_iam_role.instance.name policy_arn = aws_iam_policy.eip[0].arn } + +################################################################################ +### Lambda function for ASG instance termination lifecycle hook +################################################################################ +module "terminate_instances_lifecycle_function" { + source = "./modules/terminate-instances" + + count = var.asg_terminate_lifecycle_hook_create ? 1 : 0 + + name = var.asg_terminate_lifecycle_hook_name == null ? "terminate-instances" : var.asg_terminate_lifecycle_hook_name + environment = var.environment + asg_arn = aws_autoscaling_group.gitlab_runner_instance.arn + asg_name = aws_autoscaling_group.gitlab_runner_instance.name + cloudwatch_logging_retention_in_days = var.cloudwatch_logging_retention_in_days + lambda_memory_size = var.asg_terminate_lifecycle_lambda_memory_size + lifecycle_heartbeat_timeout = var.asg_terminate_lifecycle_hook_heartbeat_timeout + name_iam_objects = local.name_iam_objects + role_permissions_boundary = var.permissions_boundary == "" ? null : "${var.arn_format}:iam::${data.aws_caller_identity.current.account_id}:policy/${var.permissions_boundary}" + lambda_timeout = var.asg_terminate_lifecycle_lambda_timeout + tags = local.tags +} \ No newline at end of file diff --git a/modules/terminate-instances/README.md b/modules/terminate-instances/README.md new file mode 100644 index 000000000..8b59905d1 --- /dev/null +++ b/modules/terminate-instances/README.md @@ -0,0 +1,158 @@ +# terminate-instances module + +Module for Termination Lifecycle Hook Lambda Function + +This module is used __internally__ by the parent [_terraform\-aws\-gitlab\-runners_](../../README.md) module. + +## Overview + +The Lambda functions evaluates an EC2 instance tag called `gitlab-runner-parent-id`, set in the +[runner config](../../template/runner-config.tpl) by the parent module's +[user data](../../template/gitlab-runner.tpl). Runner instances created by the runner +will have this tag applied with the parent runner's instance ID. When the runner +in the ASG is terminated, the lifecycle hook triggers the Lambda to +terminate spawned runner instances with the matching parent tag and/or any "orphaned" +instances with no running parent runner. + +See [issue #214](https://github.com/npalm/terraform-aws-gitlab-runner/issues/214) for +discussion on the scenario this module addresses. + +## Usage + +### Default Behavior - Package With the Module + +The default behavior of the module is to build and package the Lambda function +when Terraform is ran. + +This produces the `.zip` file under a `builds/` directory relative to where the +Terraform root module is from the source under [`lambda/`](lambda). + +This example shows interacting with this module via the parent module's +input variables: + +```terraform +module "runner" { + source = "npalm/gitlab-runner/aws" + + asg_terminate_lifecycle_hook_create = true + + ... +``` + +### Example + +This example shows using the parent module with the lifecycle hook enable. + +Note the `asg_terminate_lifecycle_hook_*` variables: + +```terraform +module "runner" { + source = "npalm/gitlab-runner/aws" + + aws_region = "eu-west-1" + environment = "glrunners-dev" + runners_name = "glrunners-foo" + runners_gitlab_url = "https://code.foo.org/" + docker_machine_instance_type = "t3.large" + runners_request_spot_instance = false + runners_machine_autoscaling = var.runners_machine_autoscaling + + vpc_id = data.aws_vpc.current.id + subnet_ids_gitlab_runner = [data.aws_subnet.runner_a.id, data.aws_subnet.runner_b.id] + subnet_id_runners = data.aws_subnet.runner.id + + asg_max_instance_lifetime = 604800 + asg_terminate_lifecycle_hook_create = true + + permissions_boundary = "FooOrg-Permissions-Boundary" + runners_iam_instance_profile_name = "foo-gitlab-runner-profile" + runner_iam_policy_arns = [data.aws_iam_policy.sas_full_rights.arn] + + cache_bucket_prefix = var.environment + cache_shared = true + cache_expiration_days = 90 + + gitlab_runner_registration_config = { + registration_token = aws_ssm_parameter.gitlab_runner_registration_token.value + tag_list = var.runners_tag_list + description = var.runners_description + locked_to_project = "true" + run_untagged = "false" + maximum_timeout = "7200" + } + + # Refer to https://docs.docker.com/machine/drivers/aws/#options + # for 'docker_machine_options' settings with the AWS driver + docker_machine_options = var.docker_machine_options + + # See https://github.com/npalm/terraform-aws-gitlab-runner/issues/160 + runners_additional_volumes = ["/certs/client"] + + tags = local.common_tags + +} +``` + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 0.13 | +| [aws](#requirement\_aws) | >= 2.46 | + +## Providers + +| Name | Version | +|------|---------| +| [archive](#provider\_archive) | 2.2.0 | +| [aws](#provider\_aws) | 3.63.0 | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [aws_autoscaling_lifecycle_hook.terminate_instances](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/autoscaling_lifecycle_hook) | resource | +| [aws_cloudwatch_event_rule.terminate_instances](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_rule) | resource | +| [aws_cloudwatch_event_target.terminate_instances](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_target) | resource | +| [aws_cloudwatch_log_group.lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group) | resource | +| [aws_iam_policy.lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | +| [aws_iam_role.lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | +| [aws_iam_role_policy_attachment.lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | +| [aws_lambda_function.terminate_runner_instances](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function) | resource | +| [aws_lambda_permission.current_version_triggers](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_permission) | resource | +| [aws_lambda_permission.unqualified_alias_triggers](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_permission) | resource | +| [archive_file.terminate_runner_instances_lambda](https://registry.terraform.io/providers/hashicorp/archive/latest/docs/data-sources/file) | data source | +| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source | +| [aws_iam_policy_document.assume_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [asg\_arn](#input\_asg\_arn) | The ARN of the Auto Scaling Group to attach to. | `string` | n/a | yes | +| [asg\_name](#input\_asg\_name) | The name of the Auto Scaling Group to attach to. The 'environment' will be prefixed to this. | `string` | n/a | yes | +| [cloudwatch\_logging\_retention\_in\_days](#input\_cloudwatch\_logging\_retention\_in\_days) | The number of days to retain logs in CloudWatch. | `number` | `30` | no | +| [environment](#input\_environment) | A name that identifies the environment, used as a name prefix and for tagging. | `string` | n/a | yes | +| [lambda\_memory\_size](#input\_lambda\_memory\_size) | The memory size in MB to allocate to the Lambda function. | `number` | `128` | no | +| [lambda\_timeout](#input\_lambda\_timeout) | Amount of time the Lambda Function has to run in seconds. | `number` | `10` | no | +| [lifecycle\_heartbeat\_timeout](#input\_lifecycle\_heartbeat\_timeout) | The amount of time, in seconds, for the instances to remain in wait state. | `number` | `90` | no | +| [name](#input\_name) | The name of the Lambda function to create. The 'environment' will be prefixed to this. | `string` | n/a | yes | +| [name\_iam\_objects](#input\_name\_iam\_objects) | The name to use for IAM resources - roles and policies. | `string` | `""` | no | +| [role\_permissions\_boundary](#input\_role\_permissions\_boundary) | An optional IAM permissions boundary to use when creating IAM roles. | `string` | `null` | no | +| [tags](#input\_tags) | Map of tags to apply to resources. | `map(any)` | `{}` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| [lambda\_function\_arn](#output\_lambda\_function\_arn) | ---------------------------------------------------------------------------- Terminate Instances - Outputs ---------------------------------------------------------------------------- | +| [lambda\_function\_invoke\_arn](#output\_lambda\_function\_invoke\_arn) | n/a | +| [lambda\_function\_name](#output\_lambda\_function\_name) | n/a | +| [lambda\_function\_source\_code\_hash](#output\_lambda\_function\_source\_code\_hash) | n/a | + \ No newline at end of file diff --git a/modules/terminate-instances/cloudwatch.tf b/modules/terminate-instances/cloudwatch.tf new file mode 100644 index 000000000..ddf982c2e --- /dev/null +++ b/modules/terminate-instances/cloudwatch.tf @@ -0,0 +1,33 @@ +# ---------------------------------------------------------------------------- +# Terminate Instances - CloudWatch/EventBridge Resources +# +# This deploys an event rule and target for triggering the provided Lambda +# function from the ASG lifecycle hook. +# ---------------------------------------------------------------------------- +resource "aws_cloudwatch_event_rule" "terminate_instances" { + name = "${var.environment}-${var.name}" + description = "Trigger GitLab runner instance lifecycle hook on termination." + + event_pattern = < 0): + if _other_child['Reservations'][0]['Instances'][0]['State']['Name'] == "terminated": + _terminate_list.append(instance['InstanceId']) + _msg_suffix = "is terminated." + else: + continue + else: + _terminate_list.append(instance['InstanceId']) + _msg_suffix = "does not exist." + except Exception as e: + if 'InvalidInstanceID.NotFound' in str(e): + # The specified parent does not exist + _terminate_list.append(instance['InstanceId']) + _msg_suffix = "does not exist." + else: + # Handle any other excpetion and move on, skipping this instance. + print(json.dumps({ + "Level": "exception", + "Exception": str(e) + })) + continue + + print(json.dumps({ + "Level": "info", + "InstanceId": instance['InstanceId'], + "Name": _name, + "LaunchTime": str(instance['LaunchTime']), + "Message": f"{instance['InstanceId']} appears to be orphaned. Parent runner {args['parent']} {_msg_suffix}" + })) + + return _terminate_list + +def handler(event, context): + response = [] + event_detail = event['detail'] + client = boto3.client("ec2", region_name=event['region']) + if event_detail['LifecycleTransition'] != "autoscaling:EC2_INSTANCE_TERMINATING": + exit() + + _terminate_list = ec2_list(client=client,parent=event_detail['EC2InstanceId']) + if len(_terminate_list) > 0: + print(json.dumps({ + "Level": "info", + "Message": f"Terminating instances {', '.join(_terminate_list)}" + })) + try: + client.terminate_instances(InstanceIds=_terminate_list, DryRun=False) + return f"Terminated instances {', '.join(_terminate_list)}" + except Exception as e: + print(json.dumps({ + "Level": "exception", + "Exception": str(e) + })) + raise Exception(f"Encountered exception when terminating instances: {str(e)}") + else: + print(json.dumps({ + "Level": "info", + "Message": "No instances to terminate." + })) + return "No instances to terminate." + +if __name__ == "__main__": + handler(None, None) \ No newline at end of file diff --git a/modules/terminate-instances/main.tf b/modules/terminate-instances/main.tf new file mode 100644 index 000000000..f05939961 --- /dev/null +++ b/modules/terminate-instances/main.tf @@ -0,0 +1,58 @@ +# ---------------------------------------------------------------------------- +# Terminate Runner Instances Module +# +# Deploys a Lambda function, CloudWatch rule, and associated resources for +# terminating orphaned runner instances. +# ---------------------------------------------------------------------------- +data "aws_caller_identity" "current" {} + +locals { + source_sha256 = filesha256("${path.module}/lambda/lambda_function.py") +} + +data "archive_file" "terminate_runner_instances_lambda" { + type = "zip" + source_file = "${path.module}/lambda/lambda_function.py" + output_path = "builds/lambda_function_${local.source_sha256}.zip" +} + +resource "aws_lambda_function" "terminate_runner_instances" { + architectures = ["x86_64"] + description = "Lifecycle hook for terminating GitLab runner instances" + filename = data.archive_file.terminate_runner_instances_lambda.output_path + source_code_hash = data.archive_file.terminate_runner_instances_lambda.output_base64sha256 + function_name = "${var.environment}-${var.name}" + handler = "lambda_function.handler" + memory_size = var.lambda_memory_size + package_type = "Zip" + publish = true + role = aws_iam_role.lambda.arn + runtime = "python3.9" + timeout = var.lambda_timeout + tags = var.tags +} + +resource "aws_lambda_permission" "current_version_triggers" { + function_name = aws_lambda_function.terminate_runner_instances.function_name + qualifier = aws_lambda_function.terminate_runner_instances.version + statement_id = "TerminateInstanceEvent" + action = "lambda:InvokeFunction" + principal = "events.amazonaws.com" + source_arn = aws_cloudwatch_event_rule.terminate_instances.arn +} + +resource "aws_lambda_permission" "unqualified_alias_triggers" { + function_name = aws_lambda_function.terminate_runner_instances.function_name + statement_id = "TerminateInstanceEvent" + action = "lambda:InvokeFunction" + principal = "events.amazonaws.com" + source_arn = aws_cloudwatch_event_rule.terminate_instances.arn +} + +resource "aws_autoscaling_lifecycle_hook" "terminate_instances" { + name = "${var.environment}-${var.name}" + autoscaling_group_name = var.asg_name + default_result = "CONTINUE" + heartbeat_timeout = var.lifecycle_heartbeat_timeout + lifecycle_transition = "autoscaling:EC2_INSTANCE_TERMINATING" +} \ No newline at end of file diff --git a/modules/terminate-instances/outputs.tf b/modules/terminate-instances/outputs.tf new file mode 100644 index 000000000..7152b1dc7 --- /dev/null +++ b/modules/terminate-instances/outputs.tf @@ -0,0 +1,18 @@ +# ---------------------------------------------------------------------------- +# Terminate Instances - Outputs +# ---------------------------------------------------------------------------- +output "lambda_function_arn" { + value = aws_lambda_function.terminate_runner_instances.arn +} + +output "lambda_function_invoke_arn" { + value = aws_lambda_function.terminate_runner_instances.invoke_arn +} + +output "lambda_function_name" { + value = aws_lambda_function.terminate_runner_instances.function_name +} + +output "lambda_function_source_code_hash" { + value = aws_lambda_function.terminate_runner_instances.source_code_hash +} \ No newline at end of file diff --git a/modules/terminate-instances/variables.tf b/modules/terminate-instances/variables.tf new file mode 100644 index 000000000..dd2889ca1 --- /dev/null +++ b/modules/terminate-instances/variables.tf @@ -0,0 +1,64 @@ +# ---------------------------------------------------------------------------- +# Terminate Instances - Input Variables +# ---------------------------------------------------------------------------- +variable "environment" { + description = "A name that identifies the environment, used as a name prefix and for tagging." + type = string +} + +variable "name" { + description = "The name of the Lambda function to create. The 'environment' will be prefixed to this." + type = string +} + +variable "asg_name" { + description = "The name of the Auto Scaling Group to attach to. The 'environment' will be prefixed to this." + type = string +} + +variable "asg_arn" { + description = "The ARN of the Auto Scaling Group to attach to." + type = string +} + +variable "tags" { + description = "Map of tags to apply to resources." + type = map(any) + default = {} +} + +variable "role_permissions_boundary" { + description = "An optional IAM permissions boundary to use when creating IAM roles." + type = string + default = null +} + +variable "cloudwatch_logging_retention_in_days" { + description = "The number of days to retain logs in CloudWatch." + type = number + default = 30 +} + +variable "lifecycle_heartbeat_timeout" { + description = "The amount of time, in seconds, for the instances to remain in wait state." + type = number + default = 90 +} + +variable "name_iam_objects" { + description = "The name to use for IAM resources - roles and policies." + type = string + default = "" +} + +variable "lambda_memory_size" { + description = "The memory size in MB to allocate to the Lambda function." + type = number + default = 128 +} + +variable "lambda_timeout" { + description = "Amount of time the Lambda Function has to run in seconds." + default = 10 + type = number +} \ No newline at end of file diff --git a/template/gitlab-runner.tpl b/template/gitlab-runner.tpl index 78bf9a60f..32999e8a4 100644 --- a/template/gitlab-runner.tpl +++ b/template/gitlab-runner.tpl @@ -1,3 +1,7 @@ +# Provide the parent instance id in the spawned runner tags +PARENT_INSTANCE_ID=$(curl http://169.254.169.254/latest/meta-data/instance-id); +PARENT_TAG="gitlab-runner-parent-id,$${PARENT_INSTANCE_ID}" + mkdir -p /etc/gitlab-runner cat > /etc/gitlab-runner/config.toml <<- EOF @@ -5,6 +9,8 @@ ${runners_config} EOF +sed -i.bak s/__PARENT_TAG__/`echo $PARENT_TAG`/g /etc/gitlab-runner/config.toml + ${pre_install} if [[ `echo ${runners_executor}` == "docker" ]] diff --git a/template/runner-config.tpl b/template/runner-config.tpl index 683087439..5ce4c7579 100644 --- a/template/runner-config.tpl +++ b/template/runner-config.tpl @@ -54,7 +54,7 @@ log_format = "json" "amazonec2-request-spot-instance=${runners_request_spot_instance}", "amazonec2-spot-price=${runners_spot_price_bid}", "amazonec2-security-group=${runners_security_group_name}", - "amazonec2-tags=${runners_tags}", + "amazonec2-tags=${runners_tags},__PARENT_TAG__", "amazonec2-use-ebs-optimized-instance=${runners_ebs_optimized}", "amazonec2-monitoring=${runners_monitoring}", "amazonec2-iam-instance-profile=%{ if runners_iam_instance_profile_name != "" }${runners_iam_instance_profile_name}%{ else }${runners_instance_profile}%{ endif ~}", diff --git a/variables.tf b/variables.tf index 4fd3dc6e4..c1f3af6f1 100644 --- a/variables.tf +++ b/variables.tf @@ -734,3 +734,33 @@ variable "subnet_ids_gitlab_runner" { type = list(string) default = [] } + +variable "asg_terminate_lifecycle_hook_name" { + description = "Specifies a custom name for the ASG terminate lifecycle hook and related resources." + type = string + default = null +} + +variable "asg_terminate_lifecycle_hook_create" { + description = "Boolean toggling the creation of the ASG instance terminate lifecycle hook." + type = bool + default = true +} + +variable "asg_terminate_lifecycle_hook_heartbeat_timeout" { + description = "The amount of time, in seconds, for the instances to remain in wait state." + type = number + default = 90 +} + +variable "asg_terminate_lifecycle_lambda_memory_size" { + description = "The memory size in MB to allocate to the terminate-instances Lambda function." + type = number + default = 128 +} + +variable "asg_terminate_lifecycle_lambda_timeout" { + description = "Amount of time the terminate-instances Lambda Function has to run in seconds." + default = 30 + type = number +}