update consul clusters to use draining (#9)

* renamed healthcheck to sidecar This keeps this module inline with FitnessKeeper/terraform-aws-consul-agents * Changes that Allow udpates the Cluster to Drain correctly This version of the module puts the requirement on the user of the module to set instances to DRAINING in some fashion. In our case we are using our sidecar container, which includes a healthcheck which set the DRAINING State. * Formating updates * Changed default for definitions param
asicsdigital · Mar 16, 2018 · 3300f84 · 3300f84
1 parent f162ce7
commit 3300f84
Show file tree

Hide file tree

Showing 5 changed files with 32 additions and 21 deletions.
diff --git a/README.md b/README.md
@@ -38,11 +38,12 @@ This module supports two modes. If you pass a single ECS cluster ID into the `ec
 - `consul_image` - Image to use when deploying consul
 - `consul_memory_reservation` - The soft limit (in MiB) of memory to reserve for the container, (defaults 32)
 - `cluster_size`  - Consul cluster size. This must be greater the 3, defaults to 3
+- `cloudwatch_log_retention` - Specifies the number of days you want to retain log events in the specified log group. (defaults to 30)
 - `datacenter_name` - Optional overide for datacenter nam
 - `enable_script_checks` - description = This controls whether health checks that execute scripts are enabled on this agent, and defaults to false
-- `definitions` - List of Consul Service and Health Check Definitions
-- `healthcheck_image` - Image to use when deploying health check agent, defaults to fitnesskeeper/consul-healthchecks:latest image
-- `healthcheck_memory_reservation` - The soft limit (in MiB) of memory to reserve for the container, defaults 32
+- `definitions` - List of Consul Service and Health Check Definitions (defaults to "ecs-cluster")
+- `sidecar_image` - Image to use when deploying health check agent, defaults to fitnesskeeper/consul-sidecar:latest image
+- `sidecar_memory_reservation` - The soft limit (in MiB) of memory to reserve for the container, defaults 32
 - `oauth2_proxy_htpasswd_file` - Path the htpasswd file defaults to /conf/htpasswd
 - `join_ec2_tag_key` - EC2 Tag Key which consul uses to search to generate a list of IP's to Join. Defaults to Name
 - `raft_multiplier" - An integer multiplier used by Consul servers to scale key Raft timing parameters https://www.consul.io/docs/guides/performance.html defaults to 5

diff --git a/files/consul.json b/files/consul.json
@@ -8,7 +8,7 @@
             "environment": [
                 {
                     "name": "CONSUL_LOCAL_CONFIG",
-                    "value": "{ \"retry_join\": [\"provider=aws tag_key=${join_ec2_tag_key} tag_value=${join_ec2_tag}\"], \"raft_protocol\": 3, \"skip_leave_on_interrupt\": true, \"enable_script_checks\": ${enable_script_checks}, \"datacenter\":\"${datacenter}\", \"performance\": { \"raft_multiplier\": ${raft_multiplier} }, \"service\": { \"name\": \"InstanceStatus\", \"checks\": [{ \"script\": \"/usr/local/bin/instance-status.sh\", \"interval\": \"30s\" }]}, \"watches\": [ { \"type\": \"service\", \"service\": \"InstanceStatus\", \"handler\": \"/usr/local/bin/instance-status-handler.sh\" } ]}"
+                    "value": "{ \"retry_join\": [\"provider=aws tag_key=${join_ec2_tag_key} tag_value=${join_ec2_tag}\"], \"raft_protocol\": 3, \"skip_leave_on_interrupt\": true, \"enable_script_checks\": ${enable_script_checks}, \"datacenter\":\"${datacenter}\", \"performance\": { \"raft_multiplier\": ${raft_multiplier} }}"
                 },
                 {
                     "name": "CONSUL_BIND_INTERFACE",
@@ -28,7 +28,7 @@
             ],
             "volumesFrom": [
               {
-                "sourceContainer": "consul-healthchecks-${env}"
+                "sourceContainer": "consul-sidecar-${env}"
               }
             ],
             "portMappings": [
@@ -44,11 +44,11 @@
               }
         },
         {
-            "name": "consul-healthchecks-${env}",
-            "image": "${healthcheck_image}",
+            "name": "consul-sidecar-${env}",
+            "image": "${sidecar_image}",
             "essential": true,
             "cpu": 0,
-            "memoryReservation": ${healthcheck_memory_reservation},
+            "memoryReservation": ${sidecar_memory_reservation},
             "environment": [
                 {
                     "name": "CHECKS",

diff --git a/iam.tf b/iam.tf
@@ -6,6 +6,10 @@ data "aws_iam_policy_document" "consul_task_policy" {
       "ec2:DescribeAddresses",
       "ec2:DescribeInstances",
       "ec2:DescribeTags",
+      "cloudwatch:PutMetricData",
+      "ecs:DescribeClusters",
+      "ecs:UpdateContainerInstancesState",
+      "ecs:DescribeContainerInstances",
     ]
 
     resources = ["*"]

diff --git a/main.tf b/main.tf
@@ -21,10 +21,10 @@ data "template_file" "consul" {
     enable_script_checks           = "${var.enable_script_checks ? "true" : "false"}"
     image                          = "${var.consul_image}"
     registrator_image              = "${var.registrator_image}"
-    healthcheck_image              = "${var.healthcheck_image}"
+    sidecar_image                  = "${var.sidecar_image}"
     consul_memory_reservation      = "${var.consul_memory_reservation}"
     registrator_memory_reservation = "${var.registrator_memory_reservation}"
-    healthcheck_memory_reservation = "${var.healthcheck_memory_reservation}"
+    sidecar_memory_reservation     = "${var.sidecar_memory_reservation}"
     join_ec2_tag_key               = "${var.join_ec2_tag_key}"
     join_ec2_tag                   = "${var.join_ec2_tag}"
     awslogs_group                  = "consul-${var.env}"
@@ -62,7 +62,8 @@ resource "aws_ecs_task_definition" "consul" {
 }
 
 resource "aws_cloudwatch_log_group" "consul" {
-  name = "${aws_ecs_task_definition.consul.family}"
+  name              = "${aws_ecs_task_definition.consul.family}"
+  retention_in_days = "${var.cloudwatch_log_retention}"
 
   tags {
     VPC         = "${data.aws_vpc.vpc.tags["Name"]}"
@@ -76,7 +77,7 @@ resource "aws_ecs_service" "consul" {
   name                               = "consul-${var.env}"
   cluster                            = "${var.ecs_cluster_ids[0]}"
   task_definition                    = "${aws_ecs_task_definition.consul.arn}"
-  desired_count                      = "${var.cluster_size * 2}"                      # This is not awesome, it lets new AS groups get added to the cluster before destruction.
+  desired_count                      = "${var.cluster_size}"
   deployment_minimum_healthy_percent = "${var.service_minimum_healthy_percent}"
 
   placement_constraints {
@@ -103,7 +104,7 @@ resource "aws_ecs_service" "consul_primary" {
   name                               = "consul-${var.env}-primary"
   cluster                            = "${var.ecs_cluster_ids[0]}"
   task_definition                    = "${aws_ecs_task_definition.consul.arn}"
-  desired_count                      = "${var.cluster_size * 2 }"                    # This is not awesome, it lets new AS groups get added to the cluster before destruction.
+  desired_count                      = "${var.cluster_size}"
   deployment_minimum_healthy_percent = "${var.service_minimum_healthy_percent}"
 
   placement_constraints {
@@ -130,7 +131,7 @@ resource "aws_ecs_service" "consul_secondary" {
   name                               = "consul-${var.env}-secondary"
   cluster                            = "${var.ecs_cluster_ids[1]}"
   task_definition                    = "${aws_ecs_task_definition.consul.arn}"
-  desired_count                      = "${var.cluster_size * 2 }"                    # This is not awesome, it lets new AS groups get added to the cluster before destruction.
+  desired_count                      = "${var.cluster_size}"
   deployment_minimum_healthy_percent = "${var.service_minimum_healthy_percent}"
 
   placement_constraints {

diff --git a/variables.tf b/variables.tf
@@ -2,6 +2,11 @@ variable "alb_log_bucket" {
   description = "s3 bucket to send ALB Logs"
 }
 
+variable "cloudwatch_log_retention" {
+  default     = "30"
+  description = "Specifies the number of days you want to retain log events in the specified log group. (defaults to 30)"
+}
+
 variable "cluster_size" {
   default     = "3"
   description = "Consul cluster size. This must be greater the 3"
@@ -25,7 +30,7 @@ variable "datacenter_name" {
 variable "definitions" {
   type        = "list"
   description = "List of Consul Service and Health Check Definitions"
-  default     = []
+  default     = ["ecs-cluster"]
 }
 
 variable "dns_zone" {
@@ -49,12 +54,12 @@ variable "hostname" {
   default     = ""
 }
 
-variable "healthcheck_image" {
-  default     = "fitnesskeeper/consul-healthchecks"
-  description = "Image to use when deploying health check agent, defaults to fitnesskeeper/consul-healthchecks:latest image"
+variable "sidecar_image" {
+  default     = "fitnesskeeper/consul-sidecar"
+  description = "Image to use when deploying health check agent, defaults to fitnesskeeper/consul-sidecar:latest image"
 }
 
-variable "healthcheck_memory_reservation" {
+variable "sidecar_memory_reservation" {
   description = "The soft limit (in MiB) of memory to reserve for the container, defaults 32"
   default     = "32"
 }
@@ -104,8 +109,8 @@ variable "registrator_memory_reservation" {
 
 # The below var is pretty much useless until we stop doing the multiple of two thing with number of desired tasks
 variable "service_minimum_healthy_percent" {
-  description = "The minimum healthy percent represents a lower limit on the number of your service's tasks that must remain in the RUNNING state during a deployment (default 90)"
-  default     = "100"
+  description = "The minimum healthy percent represents a lower limit on the number of your service's tasks that must remain in the RUNNING state during a deployment (default 66)"
+  default     = "66"
 }
 
 variable "vpc_id" {}