From b36c1376ee009f3b1ac5003592cfd1185d17e1ed Mon Sep 17 00:00:00 2001 From: "Steven Platt, PhD" <31355889+stevenplatt@users.noreply.github.com> Date: Wed, 13 Nov 2024 13:20:01 -0500 Subject: [PATCH] feat: Google Cloud Kubernetes cluster + AWS Firewall Rules (#9915) # Change Log - **New Google Kubernetes Engine Cluster** New cluster includes regular and spot instance node pools, as well as required firewall rules for ingress and egress on ports `40400-40499`. - **Additional security group for existing AWS Kubernetes Cluster** Security group added to Kubernetes cluster to permit ingress and egress of ports 40400-40499. --- spartan/terraform/eks-cluster/main.tf | 72 +++++++-- spartan/terraform/gke-cluster/main.tf | 173 +++++++++++++++++++++ spartan/terraform/gke-cluster/outputs.tf | 17 ++ spartan/terraform/gke-cluster/variables.tf | 11 ++ 4 files changed, 261 insertions(+), 12 deletions(-) create mode 100644 spartan/terraform/gke-cluster/main.tf create mode 100644 spartan/terraform/gke-cluster/outputs.tf create mode 100644 spartan/terraform/gke-cluster/variables.tf diff --git a/spartan/terraform/eks-cluster/main.tf b/spartan/terraform/eks-cluster/main.tf index 09ef171443f..5dc2fe23511 100644 --- a/spartan/terraform/eks-cluster/main.tf +++ b/spartan/terraform/eks-cluster/main.tf @@ -1,8 +1,8 @@ terraform { backend "s3" { - bucket = "aztec-terraform" - key = "spartan/terraform.tfstate" - region = "eu-west-2" + bucket = "aztec-terraform" + key = "spartan/terraform.tfstate" + region = "eu-west-2" } required_providers { @@ -26,6 +26,54 @@ data "aws_availability_zones" "available" { } } +# Create security group for node traffic +resource "aws_security_group" "node_traffic" { + name_prefix = "eks-node-traffic" + description = "Security group for EKS node UDP and TCP traffic" + vpc_id = module.vpc.vpc_id + + # Ingress UDP rule + ingress { + from_port = 40400 + to_port = 40499 + protocol = "udp" + cidr_blocks = ["0.0.0.0/0"] + description = "Allow incoming UDP traffic" + } + + # Ingress TCP rule + ingress { + from_port = 40400 + to_port = 40499 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + description = "Allow incoming TCP traffic" + } + + # Egress UDP rule + egress { + from_port = 40400 + to_port = 40499 + protocol = "udp" + cidr_blocks = ["0.0.0.0/0"] + description = "Allow outgoing UDP traffic" + } + + # Egress TCP rule + egress { + from_port = 40400 + to_port = 40499 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + description = "Allow outgoing TCP traffic" + } + + tags = { + Name = "${var.cluster_name}-node-traffic" + Project = var.cluster_name + } +} + module "vpc" { source = "terraform-aws-modules/vpc/aws" version = "5.8.1" @@ -33,14 +81,14 @@ module "vpc" { name = var.cluster_name cidr = "10.1.0.0/16" - azs = slice(data.aws_availability_zones.available.names, 0, 3) - private_subnets = ["10.1.1.0/24", "10.1.2.0/24"] - public_subnets = ["10.1.3.0/24", "10.1.4.0/24"] + azs = slice(data.aws_availability_zones.available.names, 0, 3) + private_subnets = ["10.1.1.0/24", "10.1.2.0/24"] + public_subnets = ["10.1.3.0/24", "10.1.4.0/24"] enable_nat_gateway = true single_nat_gateway = true enable_dns_hostnames = true - enable_vpn_gateway = true + enable_vpn_gateway = true public_subnet_tags = { "kubernetes.io/role/elb" = 1 @@ -51,7 +99,7 @@ module "vpc" { } tags = { - Project = var.cluster_name + Project = var.cluster_name } } @@ -83,17 +131,17 @@ module "eks" { eks_managed_node_groups = { default = { - name = "node-group-1" + name = "node-group-1" instance_types = ["m6a.2xlarge"] min_size = 1 - max_size = 2 - desired_size = 1 + max_size = 10 + desired_size = 10 } } tags = { - Project = var.cluster_name + Project = var.cluster_name } } diff --git a/spartan/terraform/gke-cluster/main.tf b/spartan/terraform/gke-cluster/main.tf new file mode 100644 index 00000000000..46c1a51dc6c --- /dev/null +++ b/spartan/terraform/gke-cluster/main.tf @@ -0,0 +1,173 @@ +terraform { + backend "s3" { + bucket = "aztec-terraform" + key = "spartan-gke-cluster/terraform.tfstate" + region = "eu-west-2" + } + required_providers { + google = { + source = "hashicorp/google" + version = "~> 5.0" + } + } +} + +# Configure the Google Cloud provider +provider "google" { + project = var.project + region = var.region +} + +# Create the service account +resource "google_service_account" "gke_sa" { + account_id = "gke-nodes-sa" + display_name = "GKE Nodes Service Account" + description = "Service account for GKE nodes" +} + +# Add IAM roles to the service account +resource "google_project_iam_member" "gke_sa_roles" { + for_each = toset([ + "roles/logging.logWriter", + "roles/monitoring.metricWriter", + "roles/monitoring.viewer", + "roles/artifactregistry.reader" + ]) + + project = var.project + role = each.key + member = "serviceAccount:${google_service_account.gke_sa.email}" +} + +# Create ingress firewall rule for UDP +resource "google_compute_firewall" "udp_ingress" { + name = "allow-udp-ingress-40400-40499" + network = "default" + + allow { + protocol = "udp" + ports = ["40400-40499"] + } + + direction = "INGRESS" + source_ranges = ["0.0.0.0/0"] + target_tags = ["gke-node"] +} + +# Create egress firewall rule for UDP +resource "google_compute_firewall" "udp_egress" { + name = "allow-udp-egress-40400-40499" + network = "default" + + allow { + protocol = "udp" + ports = ["40400-40499"] + } + + direction = "EGRESS" + destination_ranges = ["0.0.0.0/0"] + target_tags = ["gke-node"] +} + +# Create a GKE cluster +resource "google_container_cluster" "primary" { + name = "spartan-gke" + location = var.zone + initial_node_count = 1 + + # Remove default node pool after cluster creation + remove_default_node_pool = true + + # Kubernetes version + min_master_version = "latest" + + # Network configuration + network = "default" + subnetwork = "default" + + # Master auth configuration + master_auth { + client_certificate_config { + issue_client_certificate = false + } + } +} + +# Create primary node pool with autoscaling +resource "google_container_node_pool" "primary_nodes" { + name = "primary-node-pool" + location = var.zone + cluster = google_container_cluster.primary.name + + # Enable autoscaling + autoscaling { + min_node_count = 1 + max_node_count = 5 + } + + # Node configuration + node_config { + machine_type = "t2d-standard-16" + + service_account = google_service_account.gke_sa.email + oauth_scopes = [ + "https://www.googleapis.com/auth/cloud-platform" + ] + + labels = { + env = "production" + } + + tags = ["gke-node"] + } + + # Management configuration + management { + auto_repair = true + auto_upgrade = true + } +} + +# Create spot instance node pool with autoscaling +resource "google_container_node_pool" "spot_nodes" { + name = "spot-node-pool" + location = var.zone + cluster = google_container_cluster.primary.name + + # Enable autoscaling + autoscaling { + min_node_count = 0 + max_node_count = 10 + } + + # Node configuration + node_config { + machine_type = "t2d-standard-16" + spot = true + + service_account = google_service_account.gke_sa.email + oauth_scopes = [ + "https://www.googleapis.com/auth/cloud-platform" + ] + + labels = { + env = "production" + pool = "spot" + } + + tags = ["gke-node", "spot"] + + # Spot instance termination handler + taint { + key = "cloud.google.com/gke-spot" + value = "true" + effect = "NO_SCHEDULE" + } + } + + # Management configuration + management { + auto_repair = true + auto_upgrade = true + } +} diff --git a/spartan/terraform/gke-cluster/outputs.tf b/spartan/terraform/gke-cluster/outputs.tf new file mode 100644 index 00000000000..befaa28092e --- /dev/null +++ b/spartan/terraform/gke-cluster/outputs.tf @@ -0,0 +1,17 @@ +output "cluster_endpoint" { + value = google_container_cluster.primary.endpoint +} + +output "service_account_email" { + value = google_service_account.gke_sa.email +} + +output "region" { + description = "Google cloud region" + value = var.region +} + +output "kubernetes_cluster_name" { + description = "GKE Cluster Name" + value = google_container_cluster.primary.name +} diff --git a/spartan/terraform/gke-cluster/variables.tf b/spartan/terraform/gke-cluster/variables.tf new file mode 100644 index 00000000000..555458daa5d --- /dev/null +++ b/spartan/terraform/gke-cluster/variables.tf @@ -0,0 +1,11 @@ +variable "project" { + default = "testnet-440309" +} + +variable "region" { + default = "us-east4" +} + +variable "zone" { + default = "us-east4-a" +}