From 8b5a3789682d0cf2fc88d412ce3f60ea9db81a40 Mon Sep 17 00:00:00 2001 From: toszo Date: Fri, 11 Jan 2019 08:50:41 +0100 Subject: [PATCH] Missing data files added to core folder --- core/data/azure/infrastructure/README.md | 39 + .../epiphany-bld-apps/data.yaml | 1116 ++++++++++++++++ .../epiphany-playground/README.md | 8 + .../epiphany-playground/basic-data.yaml | 25 + .../epiphany-template/data.yaml.j2 | 1149 +++++++++++++++++ core/data/metal/epiphany-lab/data.yaml | 403 ++++++ core/data/vbox/README.md | 7 + core/data/vmware/README.md | 3 + core/data/vmware/epiphany-lab/data.yaml | 403 ++++++ 9 files changed, 3153 insertions(+) create mode 100644 core/data/azure/infrastructure/README.md create mode 100644 core/data/azure/infrastructure/epiphany-bld-apps/data.yaml create mode 100644 core/data/azure/infrastructure/epiphany-playground/README.md create mode 100644 core/data/azure/infrastructure/epiphany-playground/basic-data.yaml create mode 100644 core/data/azure/infrastructure/epiphany-template/data.yaml.j2 create mode 100644 core/data/metal/epiphany-lab/data.yaml create mode 100644 core/data/vbox/README.md create mode 100644 core/data/vmware/README.md create mode 100644 core/data/vmware/epiphany-lab/data.yaml diff --git a/core/data/azure/infrastructure/README.md b/core/data/azure/infrastructure/README.md new file mode 100644 index 0000000000..8c42ad9cd4 --- /dev/null +++ b/core/data/azure/infrastructure/README.md @@ -0,0 +1,39 @@ +# Azure Data + +This folder contains all of the different clusters that need to be generated. This root folder contains all templates `*.tf.j2` in Jinja2 template format. + +Each sub-folder represents the classification of the cluster: + +```text +infrastructure + \chef + \epiphany + \ +``` + +Within the given sub-folder you place the `data.yaml` file that is used to hold all of the data required for the given cluster. It contains data, options, etc. You can use a helper script called `gen_helper.sh` in the root of this folder. It calls the `gen_terraform_template.sh` script in the `/bin` folder. It's a helper script that makes it a little easier to use. + +```text +infrastructure + \chef + data.yaml + \epiphany + data.yaml + \ +``` + +## Call EPIPHANY_UP + +This script is a helper script that calls helper scripts based on values passed. + +```text +# Assumes EPIPHANY_UP is in your path but it is most likely in the REPO_ROOT/bin + +infrastructure + \epiphany + ./EPIPHANY_UP $(pwd) ${PWD##*/} +``` + +The above command line only show 2 parameters because the third one is optional. The third parameter is the name of the data file which defaults to `data.yaml`. However, you can override the default for more flexibility. The data and output *.tf *must* reside in the same folder. + +Also, `$pwd` means to pass in the current folder path; `${PWD##*/}` means to pass only the name of the current folder; `empty` but could contain the name of the data file. diff --git a/core/data/azure/infrastructure/epiphany-bld-apps/data.yaml b/core/data/azure/infrastructure/epiphany-bld-apps/data.yaml new file mode 100644 index 0000000000..1c4a55f534 --- /dev/null +++ b/core/data/azure/infrastructure/epiphany-bld-apps/data.yaml @@ -0,0 +1,1116 @@ +--- +########################################################## +# Data file for Epiphany build +# Azure specific +########################################################## + +########################### +title: Epiphany Apps Infrastructure... + +kind: datafile +version: 1.0 + +# NOTE: Any data values that are empty put "" or the value None will be used in the templates for those attributes. + +core: + # This will apply to a VPN like environment or an air-gapped like environment + bastian: + enable: false + # This host will be set ONLY for environments where a bastian host is supplied to us and NOT part of the cluster build + host: '' + # If the bastian host has a different key + key_path: '' + user: '' + # if key_path is '' + pwd: '' + + build: + # IMPORTANT - will be appended to release name and output folder and part of the template names + version: &version 1.0.0 + # Type of build environment + environment: &env development + # Name of the given release. Version will be appended + release: &release epiphany-apps + # If repo_root is true then add that as a prefix for the output + repo_root: true + platform: azure + output: '/build/$PLATFORM/infrastructure/epiphany' + + tags: &tags + - key: environment + value: *env + - key: version + value: *version + - key: release + value: *release + - key: resourceType + value: epiphany-app-build + - key: location + value: westeurope + + # domain is used to create DNS entries or just add FQDN to hosts.yaml file used in automation + domain: + enable: false + # name is the domain name itself such as epiphanyplatform.io. This value will be appended to the host name for FQDN + name: example.com + create_dns: false + + # These will become the role/classification you will use with the automation to group nodes for given tasks + roles: + - master + - worker + - kafka + - zookeeper + - prometheus + - grafana + - node_exporter + - haproxy_tls_termination + - haproxy_exporter + - elasticsearch + - elasticsearch-curator + - kibana + - filebeat + - jmx-exporter + - kafka-exporter + - postgresql + # !Caution! + # Disable this role if you don't want restart your servers + - reboot + # These last two must always be present + - linux + # - windows + + admin_user: + name: &admin_username operations + # May want to change to 'key' and create 'key_path' with 'pwd' or 'home' + key_path: ~/.ssh/epiphany-operations/id_rsa + + azure: + tags: + <<: *tags + + terraform: + # This version info is what version is being used at the moment. The version of Terraform in the manifest.yaml in the + # root of the repo is for the initial install and the minum version + version: 1.6 + service_principal: + # Three files are required for SPs to work, az_ad_sp.json, env.sh and security.yaml. By default, these are created if the + # 'create' attribute is true. If false then you will need to supply those two files. This allows you to create + # a service_principal of your own instead of having one generated. + # You will also need to override env.sh that contains the 'ARM_...' environment variables required. + enable: true + create: true # If you want to use an existing one then set this to false + auth: pwd # Valid is 'pwd' and 'cert'. At this time Terraform only support 'pwd' for service principals (sp true) + # NOTE: Backend is a Terraform resource that stores the *.tfstate files used by Terraform to store state. The default + # is to store the state file locally but this can cause issues if working in a team environment. + backend: + # Only used by Terraform + # The backend storage account is '''backend' (combined name with suffix) + # The storage container is generated as ''-'terraform' + # NOTE: Known issue with backend tf when having different VM types below when enabled! So, only one VM entry with count set should be used. Set to false for now... + enable: false + storage_account: + storage_container: + # sleep: 15 # Number of seconds to sleep so that Azure can create the required containers before moving on + type: blob + tags: *tags + + # NOTE: May want to create region/AZ sub-folders to support creating a cluster in different regions and AZ for HA... + resource_group: &resource_group + name: &name epiphany-bld-apps + location: &location West Europe + + # Subscription name + subscription: YOUR-AZURE-SUBSCRIPTION-NAME + + # Azure Active Directory + ad: + name: *name + role: Contributor + + standard: + # One resource group is supported + resource_group: + <<: *resource_group + # exists: false - TO BE REMOVED + prevent_destory: true + # IMPORTANT - Do not set lock if you plan on editing anything in the resource group! Leave it set to false until you are ready + # ReadOnly or CanNotDelete are the only two level options if enabled! + lock: + # NOTE: This will cause locks.tf.wait to be generated. You will need a script to rename this to locks.tf and run apply again + enable: true + name: epiphany-lock + level: ReadOnly + notes: This locks the entire resource group! + tags: *tags + + # This aids in boot diagnostics if there are issues: + # https://docs.microsoft.com/en-us/azure/virtual-machines/linux/boot-diagnostics + debug: + # WIP + enable: false + storage_account: + name: epiapps + account: + tier: Standard + replication_type: LRS + # Storage, StorageV2 and BlobStorage are supported types + kind: StorageV2 + storage_container: + name: debug + + availability: + availability_set: + enable: true + name: ha-epiphany + platform_fault_domain_count: 3 + platform_update_domain_count: 5 + managed: true # Best to keep this 'true' in most cases. Mixing availability set managed with vm disk unmanaged is not allowed + tags: *tags + + security: + ssh: &ssh + key: + # Public portion of the key + file: ~/.ssh/epiphany-operations/id_rsa.pub + data: + + vpn: + # Make SURE all of the data is correct below before enabling 'vpn'. It can also take 30 minutes to an hour to create. + enable: false + name: vpn-epi-bld-apps + # Only support RouteBased type of connection currently + type: RouteBased + active_active: false + # There are two types of 'sku' (Basic and Standard). Always use Standard so any client can use. + sku: Standard + # Address space that is required by Virtual Network Gateway. This address space must be inside of virtual_network.address_space and do not overlap with other subnets defined + gateway_subnet_space: 10.1.2.0/24 + client_configuration: + # This section is very important! + # You must specify the address_space that will be allocated to use on the VPN side of the conection that will + # be able to talk to your cluster. + address_space: + - 172.16.1.0/24 + root_certificate: + # name is the name of the cert that was created for you by a trusted party OR a name you give a self-signed cert + name: EpiphanyRootCa + revoked_certificate: + name: SomeRevokedCert + thumbprint: bd0ef7d2c9XDFDFDFE9752169894d2 + # public_cert_data is the actual base64 public key from your cert. Put it in 'as is'. The '|' tells yaml to use 'as is'. + public_cert_data: | + YOUR-BASE64-CLIENT-AUTH-PUBLIC-KEY + + ip_configuration: + name: vpn-ip-config + public_ip: + output: + enable: true + sensitive: false + name: pip-vpn-epiphany + address_allocation: Dynamic + idle_timeout_in_minutes: 30 + + tags: *tags + + # subnet_id is generated so use terraform variable + private_ip: + output: + enable: true + sensitive: false + address_allocation: Dynamic + # address only applies if 'address_allocation' is 'static' + address: 10.0.2.5 + + network_security_group: + enable: true + # Note: Could create an array of NSGs and reference them name + '_' + # (i.e., epiphany_nsg_001) maybe at somepoint if needed + name: security-nsg-epiphany + tags: *tags + + rules: + - name: ssh + description: Allow SSH + priority: 102 + direction: Inbound + access: Allow + protocol: Tcp + source_port_range: "*" + destination_port_range: "22" + source_address_prefix: "*" + destination_address_prefix: "*" + + virtual_network: + name: epiphany-bld-apps-vnet + address_space: + - 10.1.0.0/22 + + subnet: + name: apps-subnet + address_prefix: 10.1.1.0/24 + # Service endpoints bypass normal public route to Azure SQL, Storage and CosmosDB services + service_endpoints: + - Microsoft.Storage + - Microsoft.Sql + - Microsoft.AzureCosmosDB + + # NOTE: Managed vs Unmanaged storage + # If you want managed storage then by default, 'storage_account' and 'storage_container' options are not required BUT + # they are still enabled in the 'main.tf.j2' template. This could be set with an enable option. + + storage_managed_disk: + # WIP + enable: false + name: epiphany-mdisk + storage_account_type: Premium_LRS + create_option: Empty + disk_size_gb: 500 + count: 1 + + # Once storage account is supported + # Should use (maybe) a different storage account for different locations. Meaning, if you have two clusters (one in east and one in west) then having a storage account for east and one for west would be good since you want storage near compute. + # No `-` in storage account name + + # 3-24 Alphanumeric only lower case + storage_account: + enable: true + name: epibldapps + account: + tier: Standard + replication_type: LRS + # Storage, StorageV2 and BlobStorage are supported types + kind: StorageV2 + + tags: *tags + + storage_container: + enable: false + # 3-63 Alphanumeric lower case plus '-' + name: epiphany-osdisks + access_type: private + + storage_blob: + enable: false + name: epiphany-osdisks + type: page + # size in bytes in 512 increments + size: 5120 + count: 2 + + storage_image_reference: &storage_image_reference + publisher: Canonical + offer: UbuntuServer + sku: 18.04-LTS + # Never put latest on anything! Need to always pin the version number but testing we can get away with it + version: "18.04.201810030" + + #publisher: RedHat + #offer: RHEL + #sku: 7.4 + # Never put latest on anything! Need to always pin the version number but testing we can get away with it + #version: "7.4.2018010506" + + storage_os_disk: &storage_os_disk + managed: true + caching: ReadWrite + create_option: FromImage + disk_size_gb: 30 + managed_disk_type: Premium_LRS + + storage_data_disk: &storage_data_disk + # Determines if a data disk will be added. If also using managed disks then use 'Attach' for create_option instead of 'Empty' + enable: false + count: 2 + managed: false + caching: ReadWrite + create_option: Empty + disk_size_gb: 30 + managed_disk_type: Standard_LRS # Can only be Standard_LRS if using non-managed availability_set + + os_profile: &os_profile + admin_username: operations + admin_password: your-secret-password + + os_profile_linux_config: &os_profile_linux_config + # NOTE: Must set to true except in early stage development! This will enforce ssh key authentication for now... + disable_password_authentication: true + + os_profile_windows_config: &os_profile_windows_config + # NOTE: Must set to true except in early stage development! This will enforce ssh key authentication for now... + disable_password_authentication: true + + # Testing sets flags that allow builds to inject commands into VMs and other related items + testing: + enable: false + + # NOTE: This MUST equal the total number of 'count' metadata from each 'vm' array entry below + vm_count: 1 + + # VM names - 1-15 characters for Windows and 1-64 characters for Linux + vms: + - name: demo-lb + size: Standard_DS1_v2 + os_type: linux + count: 1 + # One host will need to be a bastian host UNLESS the bastian host is provided in another way + bastian_host: false + # roles are how you define a grouping of nodes. These values will be used to create an inventory of your cluster + # Must be a member of the 'role' in core + roles: + - linux + - haproxy_tls_termination + - node_exporter + - filebeat + + delete_os_disk_on_termination: false + delete_data_disks_on_termination: false + # NOTE: This must match the interfaces public_ip options. If one interface has enabled public_ip then this MUST be true else false + public_ips: true + depends_on: + # Put the name of the resource. If a VM then only put the 'name:' value. The count portion will be automatically added + - '${var.master_depends_on}' + tags: *tags + + security: + # Each node will have firewalld (redhat) enabled and allowing only given ports, remember that Epiphany needs port 22 for bootstrapping environments + firewall: + enable: false + ports_open: + - 22/tcp + - 443/tcp + + # NOTE: For Kubernetes builds we should *not* use provisioners to bootstrap any data. The point is to build a common + # infrastructure and then collect IPs etc. that are required for K8s automation to build. The ONLY possible + # exception is for automated testing in VSTS + bootstrap: + # Can use the global values or override them on a per vm type basis + ssh: *ssh + + provisioners: + file: + enable: false + # NOTE: Pay close attention to the trailing '/' for 'source'. If you leave '/' off of the end of source then the directory tree will be copied to 'destination'. If you add '/' then the contents of that directory will be copied into the destination directory. + source: "../../../../core/src/scripts/kubernetes/linux" + destination: "/home/${var.admin_username}" + + remote_exec: + enable: false + # NOTE: Use Terraform vars if you need to embed variables in the commands. + inline: # Example is below and only here to show + - 'chmod +x ${var.file_destination}/linux/make-executable.sh' + - '${var.file_destination}/linux/make-executable.sh' + - 'echo ${var.admin_password} | sudo -S ${var.file_destination}/linux/prepare-system-kubernetes-redhat.sh' + + testing_inline: + - 'echo \"##vso[task.setvariable variable=toPrint]Print Me Please\"' + + interfaces: + # A VM can have multiple IP addresses and interfaces. In this case we are + # saying there is one public IP per network interface + - network_interface: + # name has vm name appended to it so only put basic type name here + name: nic + primary: true + tags: *tags + + # This only works with certain size VMs + # Accelerated Networking is supported on most general purpose and compute-optimized instance sizes with 2 or more vCPUs. + # These supported series are: D/DSv2 and F/Fs. On instances that support hyperthreading, Accelerated Networking is supported + # on VM instances with 4 or more vCPUs. Supported series are: D/DSv3, E/ESv3, Fsv2, and Ms/Mms. + # https://docs.microsoft.com/en-us/azure/virtual-network/create-vm-accelerated-networking-cli + enable_accelerated_networking: false + + ip_configuration: + name: ip-config + # subnet_id is generated so use terraform variable + private_ip: + output: + enable: true + sensitive: false + address_allocation: Dynamic + # address only applies if 'address_allocation' is 'static' + address: 10.0.2.5 + + public_ip: + # If you set to false there must be some host set as a bastian host + enable: true + output: + enable: true + sensitive: false + name: pip-epiphany + address_allocation: static + idle_timeout_in_minutes: 30 + sku: Standard + + tags: *tags + + - name: vm-k8s-master #master + size: Standard_DS2_v2 + os_type: linux + count: 1 + # One host will need to be a bastian host UNLESS the bastian host is provided in another way + bastian_host: false + # roles are how you define a grouping of nodes. These values will be used to create an inventory of your cluster + # Must be a member of the 'role' in core + roles: + - linux + - master + - node_exporter + - filebeat + #- reboot + + delete_os_disk_on_termination: false + delete_data_disks_on_termination: false + # NOTE: This must match the interfaces public_ip options. If one interface has enabled public_ip then this MUST be true else false + public_ips: true + depends_on: + # Put the name of the resource. If a VM then only put the 'name:' value. The count portion will be automatically added + - '${var.master_depends_on}' + tags: *tags + + security: + # Each node will have firewalld (redhat) enabled and allowing only given ports, remember that Epiphany needs port 22 for bootstrapping environments + firewall: + enable: false + ports_open: + - 22/tcp + - 443/tcp + + # NOTE: For Kubernetes builds we should *not* use provisioners to bootstrap any data. The point is to build a common + # infrastructure and then collect IPs etc. that are required for K8s automation to build. The ONLY possible + # exception is for automated testing in VSTS + bootstrap: + # Can use the global values or override them on a per vm type basis + ssh: *ssh + + provisioners: + file: + enable: false + # NOTE: Pay close attention to the trailing '/' for 'source'. If you leave '/' off of the end of source then the directory tree will be copied to 'destination'. If you add '/' then the contents of that directory will be copied into the destination directory. + source: "../../../../core/src/scripts/kubernetes/linux" + destination: "/home/${var.admin_username}" + + remote_exec: + enable: false + # NOTE: Use Terraform vars if you need to embed variables in the commands. + inline: # Example is below and only here to show + - 'chmod +x ${var.file_destination}/linux/make-executable.sh' + - '${var.file_destination}/linux/make-executable.sh' + - 'echo ${var.admin_password} | sudo -S ${var.file_destination}/linux/prepare-system-kubernetes-redhat.sh' + + testing_inline: + - 'echo \"##vso[task.setvariable variable=toPrint]Print Me Please\"' + + interfaces: + # A VM can have multiple IP addresses and interfaces. In this case we are + # saying there is one public IP per network interface + - network_interface: + # name has vm name appended to it so only put basic type name here + name: nic + primary: true + tags: *tags + + # This only works with certain size VMs + # Accelerated Networking is supported on most general purpose and compute-optimized instance sizes with 2 or more vCPUs. + # These supported series are: D/DSv2 and F/Fs. On instances that support hyperthreading, Accelerated Networking is supported + # on VM instances with 4 or more vCPUs. Supported series are: D/DSv3, E/ESv3, Fsv2, and Ms/Mms. + # https://docs.microsoft.com/en-us/azure/virtual-network/create-vm-accelerated-networking-cli + enable_accelerated_networking: false + + ip_configuration: + name: ip-config + # subnet_id is generated so use terraform variable + private_ip: + output: + enable: true + sensitive: false + address_allocation: Dynamic + # address only applies if 'address_allocation' is 'static' + address: 10.0.2.5 + + public_ip: + # If you set to false there must be some host set as a bastian host + enable: true + output: + enable: true + sensitive: false + name: pip-epiphany + address_allocation: static + idle_timeout_in_minutes: 30 + sku: Standard + + tags: *tags + + - name: vm-k8s-node + size: Standard_DS1_v2 + os_type: linux + count: 3 + # One host will need to be a bastian host UNLESS the bastian host is provided in another way + bastian_host: false + # roles are how you define a grouping of nodes. These values will be used to create an inventory of your cluster + # Must be a member of the 'role' in core + roles: + - linux + - worker + - node_exporter + - filebeat + - reboot + + delete_os_disk_on_termination: false + delete_data_disks_on_termination: false + # NOTE: This must match the interfaces public_ip options. If one interface has enabled public_ip then this MUST be true else false + public_ips: true + depends_on: + # Put the name of the resource. If a VM then only put the 'name:' value. The count portion will be automatically added + - '${var.master_depends_on}' + tags: *tags + + security: + # Each node will have firewalld (redhat) enabled and allowing only given ports, remember that Epiphany needs port 22 for bootstrapping environments + firewall: + enable: false + ports_open: + - 22/tcp + - 443/tcp + + # NOTE: For Kubernetes builds we should *not* use provisioners to bootstrap any data. The point is to build a common + # infrastructure and then collect IPs etc. that are required for K8s automation to build. The ONLY possible + # exception is for automated testing in VSTS + bootstrap: + # Can use the global values or override them on a per vm type basis + ssh: *ssh + + provisioners: + file: + enable: false + # NOTE: Pay close attention to the trailing '/' for 'source'. If you leave '/' off of the end of source then the directory tree will be copied to 'destination'. If you add '/' then the contents of that directory will be copied into the destination directory. + source: "../../../../core/src/scripts/kubernetes/linux" + destination: "/home/${var.admin_username}" + + remote_exec: + enable: false + # NOTE: Use Terraform vars if you need to embed variables in the commands. + inline: # Example is below and only here to show + - 'chmod +x ${var.file_destination}/linux/make-executable.sh' + - '${var.file_destination}/linux/make-executable.sh' + - 'echo ${var.admin_password} | sudo -S ${var.file_destination}/linux/prepare-system-kubernetes-redhat.sh' + + testing_inline: + - 'echo \"##vso[task.setvariable variable=toPrint]Print Me Please\"' + + interfaces: + # A VM can have multiple IP addresses and interfaces. In this case we are + # saying there is one public IP per network interface + - network_interface: + # name has vm name appended to it so only put basic type name here + name: nic + primary: true + tags: *tags + + # This only works with certain size VMs + # Accelerated Networking is supported on most general purpose and compute-optimized instance sizes with 2 or more vCPUs. + # These supported series are: D/DSv2 and F/Fs. On instances that support hyperthreading, Accelerated Networking is supported + # on VM instances with 4 or more vCPUs. Supported series are: D/DSv3, E/ESv3, Fsv2, and Ms/Mms. + # https://docs.microsoft.com/en-us/azure/virtual-network/create-vm-accelerated-networking-cli + enable_accelerated_networking: false + + ip_configuration: + name: ip-config + # subnet_id is generated so use terraform variable + private_ip: + output: + enable: true + sensitive: false + address_allocation: Dynamic + # address only applies if 'address_allocation' is 'static' + address: 10.0.2.5 + + public_ip: + # If you set to false there must be some host set as a bastian host + enable: true + output: + enable: true + sensitive: false + name: pip-epiphany + address_allocation: static + idle_timeout_in_minutes: 30 + sku: Standard + + tags: *tags + + - name: vm-kafka + size: Standard_DS2_v2 + os_type: linux + count: 1 + # One host will need to be a bastian host UNLESS the bastian host is provided in another way + bastian_host: false + # roles are how you define a grouping of nodes. These values will be used to create an inventory of your cluster + # Must be a member of the 'role' in core + roles: + - linux + - kafka + - kafka-exporter + - zookeeper + - jmx-exporter + - node_exporter + - filebeat + - reboot + + delete_os_disk_on_termination: false + delete_data_disks_on_termination: false + # NOTE: This must match the interfaces public_ip options. If one interface has enabled public_ip then this MUST be true else false + public_ips: true + depends_on: + # Put the name of the resource. If a VM then only put the 'name:' value. The count portion will be automatically added + - '${var.master_depends_on}' + tags: *tags + + security: + # Each node will have firewalld (redhat) enabled and allowing only given ports, remember that Epiphany needs port 22 for bootstrapping environments + firewall: + enable: false + ports_open: + - 22/tcp + - 443/tcp + + # NOTE: For Kubernetes builds we should *not* use provisioners to bootstrap any data. The point is to build a common + # infrastructure and then collect IPs etc. that are required for K8s automation to build. The ONLY possible + # exception is for automated testing in VSTS + bootstrap: + # Can use the global values or override them on a per vm type basis + ssh: *ssh + + provisioners: + file: + enable: false + # NOTE: Pay close attention to the trailing '/' for 'source'. If you leave '/' off of the end of source then the directory tree will be copied to 'destination'. If you add '/' then the contents of that directory will be copied into the destination directory. + source: "../../../../core/src/scripts/kubernetes/linux" + destination: "/home/${var.admin_username}" + + remote_exec: + enable: false + # NOTE: Use Terraform vars if you need to embed variables in the commands. + inline: # Example is below and only here to show + - 'chmod +x ${var.file_destination}/linux/make-executable.sh' + - '${var.file_destination}/linux/make-executable.sh' + - 'echo ${var.admin_password} | sudo -S ${var.file_destination}/linux/prepare-system-kubernetes-redhat.sh' + + testing_inline: + - 'echo \"##vso[task.setvariable variable=toPrint]Print Me Please\"' + + interfaces: + # A VM can have multiple IP addresses and interfaces. In this case we are + # saying there is one public IP per network interface + - network_interface: + # name has vm name appended to it so only put basic type name here + name: nic + primary: true + tags: *tags + + # This only works with certain size VMs + # Accelerated Networking is supported on most general purpose and compute-optimized instance sizes with 2 or more vCPUs. + # These supported series are: D/DSv2 and F/Fs. On instances that support hyperthreading, Accelerated Networking is supported + # on VM instances with 4 or more vCPUs. Supported series are: D/DSv3, E/ESv3, Fsv2, and Ms/Mms. + # https://docs.microsoft.com/en-us/azure/virtual-network/create-vm-accelerated-networking-cli + enable_accelerated_networking: false + + ip_configuration: + name: ip-config + # subnet_id is generated so use terraform variable + private_ip: + output: + enable: true + sensitive: false + address_allocation: Dynamic + # address only applies if 'address_allocation' is 'static' + address: 10.0.2.5 + + public_ip: + # If you set to false there must be some host set as a bastian host + enable: true + output: + enable: true + sensitive: false + name: pip-epiphany + address_allocation: static + idle_timeout_in_minutes: 30 + sku: Standard + + tags: *tags + + - name: vm-monitoring + size: Standard_DS1_v2 + os_type: linux + count: 1 + # One host will need to be a bastian host UNLESS the bastian host is provided in another way + bastian_host: false + # roles are how you define a grouping of nodes. These values will be used to create an inventory of your cluster + # Must be a member of the 'role' in core + roles: + - grafana + - node_exporter + - prometheus + - filebeat + - reboot + - linux + + delete_os_disk_on_termination: false + delete_data_disks_on_termination: false + # NOTE: This must match the interfaces public_ip options. If one interface has enabled public_ip then this MUST be true else false + public_ips: true + depends_on: + # Put the name of the resource. If a VM then only put the 'name:' value. The count portion will be automatically added + - '${var.master_depends_on}' + tags: *tags + + security: + # Each node will have firewalld (redhat) enabled and allowing only given ports, remember that Epiphany needs port 22 for bootstrapping environments + firewall: + enable: false + ports_open: + - 22/tcp + - 443/tcp + + # NOTE: For Kubernetes builds we should *not* use provisioners to bootstrap any data. The point is to build a common + # infrastructure and then collect IPs etc. that are required for K8s automation to build. The ONLY possible + # exception is for automated testing in VSTS + bootstrap: + # Can use the global values or override them on a per vm type basis + ssh: *ssh + + provisioners: + file: + enable: false + # NOTE: Pay close attention to the trailing '/' for 'source'. If you leave '/' off of the end of source then the directory tree will be copied to 'destination'. If you add '/' then the contents of that directory will be copied into the destination directory. + source: "../../../../core/src/scripts/kubernetes/linux" + destination: "/home/${var.admin_username}" + + remote_exec: + enable: false + # NOTE: Use Terraform vars if you need to embed variables in the commands. + inline: # Example is below and only here to show + - 'chmod +x ${var.file_destination}/linux/make-executable.sh' + - '${var.file_destination}/linux/make-executable.sh' + - 'echo ${var.admin_password} | sudo -S ${var.file_destination}/linux/prepare-system-kubernetes-redhat.sh' + + testing_inline: + - 'echo \"##vso[task.setvariable variable=toPrint]Print Me Please\"' + + interfaces: + # A VM can have multiple IP addresses and interfaces. In this case we are + # saying there is one public IP per network interface + - network_interface: + # name has vm name appended to it so only put basic type name here + name: nic + primary: true + tags: *tags + + # This only works with certain size VMs + # Accelerated Networking is supported on most general purpose and compute-optimized instance sizes with 2 or more vCPUs. + # These supported series are: D/DSv2 and F/Fs. On instances that support hyperthreading, Accelerated Networking is supported + # on VM instances with 4 or more vCPUs. Supported series are: D/DSv3, E/ESv3, Fsv2, and Ms/Mms. + # https://docs.microsoft.com/en-us/azure/virtual-network/create-vm-accelerated-networking-cli + enable_accelerated_networking: false + + ip_configuration: + name: ip-config + # subnet_id is generated so use terraform variable + private_ip: + output: + enable: true + sensitive: false + address_allocation: Dynamic + # address only applies if 'address_allocation' is 'static' + address: 10.0.2.5 + + public_ip: + # If you set to false there must be some host set as a bastian host + enable: true + output: + enable: true + sensitive: false + name: pip-epiphany + address_allocation: static + idle_timeout_in_minutes: 30 + sku: Standard + + tags: *tags + + - name: vm-logging + size: Standard_DS1_v2 + os_type: linux + count: 1 + # One host will need to be a bastian host UNLESS the bastian host is provided in another way + bastian_host: false + # roles are how you define a grouping of nodes. These values will be used to create an inventory of your cluster + # Must be a member of the 'role' in core + roles: + - node_exporter + - elasticsearch + - elasticsearch-curator + - kibana + - filebeat + - reboot + - linux + + delete_os_disk_on_termination: false + delete_data_disks_on_termination: false + # NOTE: This must match the interfaces public_ip options. If one interface has enabled public_ip then this MUST be true else false + public_ips: true + depends_on: + # Put the name of the resource. If a VM then only put the 'name:' value. The count portion will be automatically added + - '${var.master_depends_on}' + tags: *tags + + security: + # Each node will have firewalld (redhat) enabled and allowing only given ports, remember that Epiphany needs port 22 for bootstrapping environments + firewall: + enable: false + ports_open: + - 22/tcp + - 443/tcp + + # NOTE: For Kubernetes builds we should *not* use provisioners to bootstrap any data. The point is to build a common + # infrastructure and then collect IPs etc. that are required for K8s automation to build. The ONLY possible + # exception is for automated testing in VSTS + bootstrap: + # Can use the global values or override them on a per vm type basis + ssh: *ssh + + provisioners: + file: + enable: false + # NOTE: Pay close attention to the trailing '/' for 'source'. If you leave '/' off of the end of source then the directory tree will be copied to 'destination'. If you add '/' then the contents of that directory will be copied into the destination directory. + source: "../../../../core/src/scripts/kubernetes/linux" + destination: "/home/${var.admin_username}" + + remote_exec: + enable: false + # NOTE: Use Terraform vars if you need to embed variables in the commands. + inline: # Example is below and only here to show + - 'chmod +x ${var.file_destination}/linux/make-executable.sh' + - '${var.file_destination}/linux/make-executable.sh' + - 'echo ${var.admin_password} | sudo -S ${var.file_destination}/linux/prepare-system-kubernetes-redhat.sh' + + testing_inline: + - 'echo \"##vso[task.setvariable variable=toPrint]Print Me Please\"' + + interfaces: + # A VM can have multiple IP addresses and interfaces. In this case we are + # saying there is one public IP per network interface + - network_interface: + # name has vm name appended to it so only put basic type name here + name: nic + primary: true + tags: *tags + + # This only works with certain size VMs + # Accelerated Networking is supported on most general purpose and compute-optimized instance sizes with 2 or more vCPUs. + # These supported series are: D/DSv2 and F/Fs. On instances that support hyperthreading, Accelerated Networking is supported + # on VM instances with 4 or more vCPUs. Supported series are: D/DSv3, E/ESv3, Fsv2, and Ms/Mms. + # https://docs.microsoft.com/en-us/azure/virtual-network/create-vm-accelerated-networking-cli + enable_accelerated_networking: false + + ip_configuration: + name: ip-config + # subnet_id is generated so use terraform variable + private_ip: + output: + enable: true + sensitive: false + address_allocation: Dynamic + # address only applies if 'address_allocation' is 'static' + address: 10.0.2.5 + + public_ip: + # If you set to false there must be some host set as a bastian host + enable: true + output: + enable: true + sensitive: false + name: pip-epiphany + address_allocation: static + idle_timeout_in_minutes: 30 + sku: Standard + tags: *tags + + - name: vm-storage + size: Standard_DS1_v2 + os_type: linux + count: 1 + # One host will need to be a bastian host UNLESS the bastian host is provided in another way + bastian_host: false + # roles are how you define a grouping of nodes. These values will be used to create an inventory of your cluster + # Must be a member of the 'role' in core + roles: + - node_exporter + - filebeat + - postgresql + - linux + + delete_os_disk_on_termination: false + delete_data_disks_on_termination: false + # NOTE: This must match the interfaces public_ip options. If one interface has enabled public_ip then this MUST be true else false + public_ips: true + depends_on: + # Put the name of the resource. If a VM then only put the 'name:' value. The count portion will be automatically added + - '${var.master_depends_on}' + tags: *tags + + security: + # Each node will have firewalld (redhat) enabled and allowing only given ports, remember that Epiphany needs port 22 for bootstrapping environments + firewall: + enable: false + ports_open: + - 22/tcp + - 443/tcp + + # NOTE: For Kubernetes builds we should *not* use provisioners to bootstrap any data. The point is to build a common + # infrastructure and then collect IPs etc. that are required for K8s automation to build. The ONLY possible + # exception is for automated testing in VSTS + bootstrap: + # Can use the global values or override them on a per vm type basis + ssh: *ssh + + provisioners: + file: + enable: false + # NOTE: Pay close attention to the trailing '/' for 'source'. If you leave '/' off of the end of source then the directory tree will be copied to 'destination'. If you add '/' then the contents of that directory will be copied into the destination directory. + source: "../../../../core/src/scripts/kubernetes/linux" + destination: "/home/${var.admin_username}" + + remote_exec: + enable: false + # NOTE: Use Terraform vars if you need to embed variables in the commands. + inline: # Example is below and only here to show + - 'chmod +x ${var.file_destination}/linux/make-executable.sh' + - '${var.file_destination}/linux/make-executable.sh' + - 'echo ${var.admin_password} | sudo -S ${var.file_destination}/linux/prepare-system-kubernetes-redhat.sh' + + testing_inline: + - 'echo \"##vso[task.setvariable variable=toPrint]Print Me Please\"' + + interfaces: + # A VM can have multiple IP addresses and interfaces. In this case we are + # saying there is one public IP per network interface + - network_interface: + # name has vm name appended to it so only put basic type name here + name: nic + primary: true + tags: *tags + + # This only works with certain size VMs + # Accelerated Networking is supported on most general purpose and compute-optimized instance sizes with 2 or more vCPUs. + # These supported series are: D/DSv2 and F/Fs. On instances that support hyperthreading, Accelerated Networking is supported + # on VM instances with 4 or more vCPUs. Supported series are: D/DSv3, E/ESv3, Fsv2, and Ms/Mms. + # https://docs.microsoft.com/en-us/azure/virtual-network/create-vm-accelerated-networking-cli + enable_accelerated_networking: false + + ip_configuration: + name: ip-config + # subnet_id is generated so use terraform variable + private_ip: + output: + enable: true + sensitive: false + address_allocation: Dynamic + # address only applies if 'address_allocation' is 'static' + address: 10.0.2.5 + + public_ip: + # If you set to false there must be some host set as a bastian host + enable: true + output: + enable: true + sensitive: false + name: pip-epiphany + address_allocation: static + idle_timeout_in_minutes: 30 + sku: Standard + tags: *tags + + kubernetes: + version: 1.13.0 + storage: + enable: true + #valid chocies: + #azure-file + #WIP + type: azure-file + tags: *tags + quota: 50 + + haproxy: + stats: + enable: true + user: operations + password: your-haproxy-stats-pwd + frontend: + - name: https_front + port: 443 + https: yes + backend: + - http_back1 + backend: + - name: http_back1 + server_groups: + - worker + #servers: + # Definition for server to that hosts the application. + #- name: "node1" + # address: "epiphany-vm1.domain.com" + port: 30104 + + monitoring: + alerts: + enable: False # required value + handlers: + mail: + enable: False # required value + smtp_from: "alert@test.com" + smtp_host: "smtp-url:smtp-port" + smtp_auth_username: "your-smtp-user@domain.com" + smtp_auth_password: "your-smtp-password" + smtp_require_tls: True + recipients: + - recipient1@domain.com + - recipient2@domain.com + slack: + enable: False # required value + api_url: url-to-slack-workspace-api.slack.com + pagerduty: + enable: False # required value + service_key: your-service-key + rules: + - name: "UpDown" + expression: up == 0 + duration: 1m #1s, 1m, 1h, 1d, 1w, ... + severity: critical + message: "Node is down." + - name: "DiskSpace" + expression: ((node_filesystem_avail_bytes* 100) / node_filesystem_size_bytes) < 20 # 100 - 80 + duration: 1m #1s, 1m, 1h, 1d, 1w, ... + severity: critical + message: "Disk usage is above 80%" + - name: "DiskSpacePrediction" + expression: predict_linear(node_filesystem_free_bytes{job="node"}[1h], 48 * 3600) < 0 + duration: 1m #1s, 1m, 1h, 1d, 1w, ... + severity: warning + message: "Disk will run out of space in less than 48h" + - name: "MemoryUsage" + expression: (sum by (instance) (node_memory_MemTotal_bytes) - sum by (instance)(node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes) ) / sum by (instance)(node_memory_MemTotal_bytes) * 100 > 80 + duration: 15m #1s, 1m, 1h, 1d, 1w, ... + severity: warning + message: "Server memory has been used in more than 80% during last 15 minutes." + - name: "CpuLoad" + expression: 100 - (avg by (instance) (irate(node_cpu_seconds_total{job="node",mode="idle"}[5m])) * 100) > 80 + duration: 15m #1s, 1m, 1h, 1d, 1w, ... + severity: critical + message: "CPU utilization has exceeded 80% over last 15 minutes." + - name: "KafkaConsumerLag" + expression: sum by(consumergroup) (kafka_consumergroup_lag) > 1000 + duration: 15m #1s, 1m, 1h, 1d, 1w, ... + severity: critical + message: "Kafka consumers are lagging more than 1000 messages over last 15 minutes." + \ No newline at end of file diff --git a/core/data/azure/infrastructure/epiphany-playground/README.md b/core/data/azure/infrastructure/epiphany-playground/README.md new file mode 100644 index 0000000000..e91e0c82ff --- /dev/null +++ b/core/data/azure/infrastructure/epiphany-playground/README.md @@ -0,0 +1,8 @@ +# Simplified + +This environment and its data.yaml will be created using following command: +``` +./epiphany -a -b -i -f infrastructure/epiphany-playground -t infrastructure/epiphany-template +``` + +If you want to use simplified file you can use `-t` parameter with path to folder that contains `data.yaml.j2` template file. Using `-t` will cause data.yaml will be regenerated. diff --git a/core/data/azure/infrastructure/epiphany-playground/basic-data.yaml b/core/data/azure/infrastructure/epiphany-playground/basic-data.yaml new file mode 100644 index 0000000000..44829f8cf6 --- /dev/null +++ b/core/data/azure/infrastructure/epiphany-playground/basic-data.yaml @@ -0,0 +1,25 @@ +--- +# Simplified datafile that you can use together with template (see README.md in this folder). +# Change values according to your needs, start with generating ssh keys and placing them in the directory "keys_directory". Do not forget to update "keys_directory" as well. +kind: simplified-datafile +version: 1.0.1 +environment_name: Playground +azure: + subscription_name: YOUR-SUBSCRIPTION-NAME + resource_group: 'playground' + location: 'West Europe' + image_offer: RHEL #UbuntuServer + boot_storage: 'epiplaygrnd1' + image_sku: 7.5 #18.04-LTS + image_publisher: RedHat #Canonical + image_version: 7.5.2018081519 #18.04.201810030 + vm_name_prefix: 'vmplaygrnd' + create_service_principal: true +security: + keys_directory: '/home/your-username/playground' + key_file_name: id_rsa + public_key_file_name: id_rsa.pub +platform: + worker_vms: 3 + kafka_vms: 1 + postgresql_vms: 1 diff --git a/core/data/azure/infrastructure/epiphany-template/data.yaml.j2 b/core/data/azure/infrastructure/epiphany-template/data.yaml.j2 new file mode 100644 index 0000000000..e167d19f97 --- /dev/null +++ b/core/data/azure/infrastructure/epiphany-template/data.yaml.j2 @@ -0,0 +1,1149 @@ +--- +########################################################## +# This file is autogenerated for environment {{ environment_name }} +# Data file for Epiphany +# Azure specific +########################################################## + +########################### +title: Epiphany ({{ azure.image_offer }}) {{ environment_name }} + +kind: datafile +version: 1.0.1 + +# NOTE: Any data values that are empty put "" or the value None will be used in the templates for those attributes. + +core: + # This will apply to a VPN like environment or an air-gapped like environment + bastian: + enable: false + # This host will be set ONLY for environments where a bastian host is supplied to us and NOT part of the cluster build + host: '' + # If the bastian host has a different key + key_path: '' + user: '' + # if key_path is '' + pwd: '' + + build: + # IMPORTANT - will be appended to release name and output folder and part of the template names + version: &version 0.1.21 + # Type of build environment + environment: &env development + # Name of the given release. Version will be appended + release: epiphany-dev + # If repo_root is true then add that as a prefix for the output + repo_root: true + platform: azure + output: '/build/$PLATFORM/infrastructure/epiphany' + + tags: &tags + - key: environment + value: *env + - key: version + value: *version + - key: project + value: epiphany + - key: location + value: {{ azure.location }} + + # domain is used to create DNS entries or just add FQDN to hosts.yaml file used in automation + domain: + enable: false + name: example.com + create_dns: false + + # These will become the role/classification you will use with the automation to group nodes for given tasks + # Use '_' and not '-' in names + roles: + - master + - worker + - kafka + - zookeeper + - grafana + - node_exporter + - prometheus + - elasticsearch + - elasticsearch-curator + - kibana + - filebeat + - jmx-exporter + - kafka-exporter + - haproxy_tls_termination + - haproxy_exporter + - postgresql + - reboot + # These last two must always be present + - linux + # - windows + + admin_user: + name: &admin_username operations + # May want to change to 'key' and create 'key_path' with 'pwd' or 'home' + key_path: {{ security.keys_directory }}/{{ security.key_file_name }} + + azure: + tags: + <<: *tags + + terraform: + # This version info is what version is being used at the moment. The version of Terraform in the manifest.yaml in the + # root of the repo is for the initial install and the minum version + version: 1.6 + service_principal: + # Three files are required for SPs to work, az_ad_sp.json, env.sh and security.yaml. By default, these are created if the + # 'create' attribute is true. If false then you will need to supply those two files. This allows you to create + # a service_principal of your own instead of having one generated. + # You will also need to override env.sh that contains the 'ARM_...' environment variables required. + enable: true + create: {{ azure.create_service_principal }} # If you want to use an existing one then set this to false + auth: pwd # Valid is 'pwd' and 'cert'. At this time Terraform only support 'pwd' for service principals (sp true) + # NOTE: Backend is a Terraform resource that stores the *.tfstate files used by Terraform to store state. The default + # is to store the state file locally but this can cause issues if working in a team environment. + backend: + # Only used by Terraform + # The backend storage account is '''backend' (combined name with suffix) + # The storage container is generated as ''-'terraform' + # NOTE: Known issue with backend tf when having different VM types below when enabled! So, only one VM entry with count set should be used. Set to false for now... + enable: false + storage_account: + storage_container: + # sleep: 15 # Number of seconds to sleep so that Azure can create the required containers before moving on + type: blob + tags: *tags + + # NOTE: May want to create region/AZ sub-folders to support creating a cluster in different regions and AZ for HA... + resource_group: &resource_group + name: &name {{ azure.resource_group }} + location: &location {{ azure.location }} + + # Subscription name or ID + subscription: {{ azure.subscription_name }} + + # Azure Active Directory + ad: + name: *name + role: Contributor + + standard: + # One resource group is supported + resource_group: + <<: *resource_group + # exists: false - TO BE REMOVED + prevent_destory: true + # IMPORTANT - Do not set lock if you plan on editing anything in the resource group! Leave it set to false until you are ready + # ReadOnly or CanNotDelete are the only two level options if enabled! + lock: + # NOTE: This will cause locks.tf.wait to be generated. You will need a script to rename this to locks.tf and run apply again + enable: true + name: epiphany-lock + level: ReadOnly + notes: This locks the entire resource group! + tags: *tags + + # This aids in boot diagnostics if there are issues: + # https://docs.microsoft.com/en-us/azure/virtual-machines/linux/boot-diagnostics + debug: + # WIP + enable: false + storage_account: + name: {{ azure.boot_storage }} + account: + tier: Standard + replication_type: LRS + # Storage, StorageV2 and BlobStorage are supported types + kind: StorageV2 + storage_container: + name: debug + + availability: + availability_set: + enable: true + name: ha-epiphany + platform_fault_domain_count: 3 + platform_update_domain_count: 5 + managed: true # Best to keep this 'true' in most cases. Mixing availability set managed with vm disk unmanaged is not allowed + tags: *tags + + security: + ssh: &ssh + key: + # Public portion of the key + file: {{ security.keys_directory }}/{{ security.public_key_file_name }} + data: + + vpn: + # Make SURE all of the data is correct below before enabling 'vpn'. It can also take 30 minutes to an hour to create. + enable: false + name: vpn-epiphany + # Only support RouteBased type of connection currently + type: RouteBased + active_active: false + # There are two types of 'sku' (Basic and Standard). Always use Standard so any client can use. + sku: Standard + # Address space that is required by Virtual Network Gateway. This address space must be inside of virtual_network.address_space and do not overlap with other subnets defined + gateway_subnet_space: 10.0.3.0/24 + client_configuration: + # This section is very important! + # You must specify the address_space that will be allocated to use on the VPN side of the conection that will + # be able to talk to your cluster. + address_space: + - 172.16.0.0/24 + root_certificate: + # name is the name of the cert that was created for you by a trusted party OR a name you give a self-signed cert + name: EpiphanyRootCa + revoked_certificate: + name: EpiphanyRevoked + thumbprint: your-revoked-cert-thumbprint + # public_cert_data is the actual base64 public key from your cert. Put it in 'as is'. The '|' tells yaml to use 'as is'. + public_cert_data: | + YOUR-BASE64-CLIENT-AUTH-PUBLIC-KEY + + ip_configuration: + name: vpn-ip-config + public_ip: + output: + enable: true + sensitive: false + name: pip-vpn-epiphany + address_allocation: dynamic + idle_timeout_in_minutes: 30 + + tags: *tags + + # subnet_id is generated so use terraform variable + private_ip: + output: + enable: true + sensitive: false + address_allocation: dynamic + # address only applies if 'address_allocation' is 'static' + address: 10.0.2.5 + + network_security_group: + enable: true + # Note: Could create an array of NSGs and reference them name + '_' + # (i.e., epiphany_nsg_001) maybe at somepoint if needed + name: security-nsg-epiphany + tags: *tags + + # Add another NSG rules in order to access resources. + rules: + - name: ssh + description: Allow SSH + priority: 101 + direction: Inbound + access: Allow + protocol: Tcp + source_port_range: "*" + destination_port_range: "22" + source_address_prefix: "*" + destination_address_prefix: "*" + + virtual_network: + name: security-vnet-epiphany + address_space: + - 10.0.0.0/8 + + subnet: + name: vnet-subnet-epiphany + address_prefix: 10.0.0.0/16 + # Service endpoints bypass normal public route to Azure SQL, Storage and CosmosDB services + service_endpoints: + - Microsoft.Storage + - Microsoft.Sql + - Microsoft.AzureCosmosDB + + # NOTE: Managed vs Unmanaged storage + # If you want managed storage then by default, 'storage_account' and 'storage_container' options are not required BUT + # they are still enabled in the 'main.tf.j2' template. This could be set with an enable option. + + storage_managed_disk: + # WIP + enable: false + name: epiphany-mdisk + storage_account_type: Premium_LRS + create_option: Empty + disk_size_gb: 500 + count: 1 + + # Once storage account is supported + # Should use (maybe) a different storage account for different locations. Meaning, if you have two clusters (one in east and one in west) then having a storage account for east and one for west would be good since you want storage near compute. + # No `-` in storage account name + + # 3-24 Alphanumeric only lower case + storage_account: + enable: false + name: epiphany + account: + tier: Standard + replication_type: LRS + # Storage, StorageV2 and BlobStorage are supported types + kind: StorageV2 + + tags: *tags + + storage_container: + enable: false + # 3-63 Alphanumeric lower case plus '-' + name: epiphany-osdisks + access_type: private + + storage_blob: + enable: false + name: epiphany-osdisks + type: page + # size in bytes in 512 increments + size: 5120 + count: 2 + + storage_image_reference: &storage_image_reference + # publisher: Canonical + # offer: UbuntuServer + # sku: 16.04-LTS + # Never put latest on anything! Need to always pin the version number but testing we can get away with it + # version: latest + + publisher: {{ azure.image_publisher }} + offer: {{ azure.image_offer }} + sku: {{ azure.image_sku }} + # Never put latest on anything! Need to always pin the version number but testing we can get away with it + version: {{ azure.image_version }} + + storage_os_disk: &storage_os_disk + managed: false + caching: ReadWrite + create_option: FromImage + disk_size_gb: 30 + managed_disk_type: Premium_LRS + + storage_data_disk: &storage_data_disk + # Determines if a data disk will be added. If also using managed disks then use 'Attach' for create_option instead of 'Empty' + enable: false + count: 2 + managed: false + caching: ReadWrite + create_option: Empty + disk_size_gb: 30 + managed_disk_type: Standard_LRS # Can only be Standard_LRS if using non-managed availability_set + + os_profile: &os_profile + admin_username: operations + admin_password: YourPwd + + os_profile_linux_config: &os_profile_linux_config + # NOTE: Must set to true except in early stage development! This will enforce ssh key authentication for now... + disable_password_authentication: true + + os_profile_windows_config: &os_profile_windows_config + # NOTE: Must set to true except in early stage development! This will enforce ssh key authentication for now... + disable_password_authentication: true + + # Testing sets flags that allow builds to inject commands into VMs and other related items + testing: + enable: false + + # NOTE: This MUST equal the total number of 'count' metadata from each 'vm' array entry below + vm_count: 8 + + # VM names - 1-15 characters for Windows and 1-64 characters for Linux + vms: + + vms: + - name: {{ azure.vm_name_prefix }}-lb + size: Standard_DS1_v2 + os_type: linux + count: 1 + # One host will need to be a bastian host UNLESS the bastian host is provided in another way + bastian_host: false + # roles are how you define a grouping of nodes. These values will be used to create an inventory of your cluster + # Must be a member of the 'role' in core + roles: + - linux + - haproxy_tls_termination + - haproxy_exporter + - node_exporter + - filebeat + + delete_os_disk_on_termination: false + delete_data_disks_on_termination: false + # NOTE: This must match the interfaces public_ip options. If one interface has enabled public_ip then this MUST be true else false + public_ips: true + depends_on: + # Put the name of the resource. If a VM then only put the 'name:' value. The count portion will be automatically added + - '${var.master_depends_on}' + tags: *tags + + security: + # Each node will have firewalld (redhat) enabled and allowing only given ports, remember that Epiphany needs port 22 for bootstrapping environments + firewall: + enable: false + ports_open: + - 22/tcp + - 443/tcp + + # NOTE: For Kubernetes builds we should *not* use provisioners to bootstrap any data. The point is to build a common + # infrastructure and then collect IPs etc. that are required for K8s automation to build. The ONLY possible + # exception is for automated testing in VSTS + bootstrap: + # Can use the global values or override them on a per vm type basis + ssh: *ssh + + provisioners: + file: + enable: false + # NOTE: Pay close attention to the trailing '/' for 'source'. If you leave '/' off of the end of source then the directory tree will be copied to 'destination'. If you add '/' then the contents of that directory will be copied into the destination directory. + source: "../../../../core/src/scripts/kubernetes/linux" + destination: "/home/${var.admin_username}" + + remote_exec: + enable: false + # NOTE: Use Terraform vars if you need to embed variables in the commands. + inline: # Example is below and only here to show + - 'chmod +x ${var.file_destination}/linux/make-executable.sh' + - '${var.file_destination}/linux/make-executable.sh' + - 'echo ${var.admin_password} | sudo -S ${var.file_destination}/linux/prepare-system-kubernetes-redhat.sh' + + testing_inline: + - 'echo \"##vso[task.setvariable variable=toPrint]Print Me Please\"' + + interfaces: + # A VM can have multiple IP addresses and interfaces. In this case we are + # saying there is one public IP per network interface + - network_interface: + # name has vm name appended to it so only put basic type name here + name: nic + primary: true + tags: *tags + + # This only works with certain size VMs + # Accelerated Networking is supported on most general purpose and compute-optimized instance sizes with 2 or more vCPUs. + # These supported series are: D/DSv2 and F/Fs. On instances that support hyperthreading, Accelerated Networking is supported + # on VM instances with 4 or more vCPUs. Supported series are: D/DSv3, E/ESv3, Fsv2, and Ms/Mms. + # https://docs.microsoft.com/en-us/azure/virtual-network/create-vm-accelerated-networking-cli + enable_accelerated_networking: false + + ip_configuration: + name: ip-config + # subnet_id is generated so use terraform variable + private_ip: + output: + enable: true + sensitive: false + address_allocation: Dynamic + # address only applies if 'address_allocation' is 'static' + address: 10.0.2.5 + + public_ip: + # If you set to false there must be some host set as a bastian host + enable: true + output: + enable: true + sensitive: false + name: pip-epiphany + address_allocation: static + idle_timeout_in_minutes: 30 + sku: Standard + + tags: *tags + + - name: {{ azure.vm_name_prefix }}-master + size: Standard_DS2_v2 + os_type: linux + count: 1 + # One host will need to be a bastian host UNLESS the bastian host is provided in another way + bastian_host: false + # roles are how you define a grouping of nodes. These values will be used to create an inventory of your cluster + # Must be a member of the 'role' in core + roles: + - linux + - master + - filebeat + - node_exporter + - reboot + delete_os_disk_on_termination: false + delete_data_disks_on_termination: false + # NOTE: This must match the interfaces public_ip options. If one interface has enabled public_ip then this MUST be true else false + public_ips: true + depends_on: + # Put the name of the resource. If a VM then only put the 'name:' value. The count portion will be automatically added + - '${var.master_depends_on}' + tags: *tags + + security: + # Each node will have firewalld (redhat) enabled or not allowing the given ports + firewall: + enable: false + # These ports can be in sync with NSG rules below or not. Meaning, you may have NSG disabled and want each node to block + # Each node could belong to a different grouping with a need for different ports + ports_open: + - 443 + - 22 + - 6443 + - 2379-2380 + - 10250 + - 10251 + - 10252 + - 10255 + + # NOTE: For Kubernetes builds we should *not* use provisioners to bootstrap any data. The point is to build a common + # infrastructure and then collect IPs etc. that are required for K8s automation to build. The ONLY possible + # exception is for automated testing in VSTS (maybe) + bootstrap: + # Can use the global values or override them on a per vm type basis + ssh: *ssh + + provisioners: + file: + enable: false + # NOTE: Pay close attention to the trailing '/' for 'source'. If you leave '/' off of the end of source then the directory tree will be copied to 'destination'. If you add '/' then the contents of that directory will be copied into the destination directory. + source: "../../../../core/src/scripts/kubernetes/linux" + destination: "/home/${var.admin_username}" + + remote_exec: + enable: false + # NOTE: Use Terraform vars if you need to embed variables in the commands. + inline: # Example is below and only here to show + - 'chmod +x ${var.file_destination}/linux/make-executable.sh' + - '${var.file_destination}/linux/make-executable.sh' + - 'echo ${var.admin_password} | sudo -S ${var.file_destination}/linux/prepare-system-kubernetes-redhat.sh' + + testing_inline: + - 'echo \"##vso[task.setvariable variable=toPrint]Print Me Please\"' + + interfaces: + # A VM can have multiple IP addresses and interfaces. In this case we are + # saying there is one public IP per network interface + - network_interface: + # name has vm name appended to it so only put basic type name here + name: nic + primary: true + tags: *tags + + # This only works with certain size VMs + # Accelerated Networking is supported on most general purpose and compute-optimized instance sizes with 2 or more vCPUs. + # These supported series are: D/DSv2 and F/Fs. On instances that support hyperthreading, Accelerated Networking is supported + # on VM instances with 4 or more vCPUs. Supported series are: D/DSv3, E/ESv3, Fsv2, and Ms/Mms. + # https://docs.microsoft.com/en-us/azure/virtual-network/create-vm-accelerated-networking-cli + enable_accelerated_networking: false + + ip_configuration: + name: ip-config + # subnet_id is generated so use terraform variable + private_ip: + output: + enable: true + sensitive: false + address_allocation: dynamic + # address only applies if 'address_allocation' is 'static' + address: 10.0.2.5 + + public_ip: + # If you set to false there must be some host set as a bastian host + enable: true + output: + enable: true + sensitive: false + name: pip-epiphany + address_allocation: static + idle_timeout_in_minutes: 30 + sku: Standard + tags: *tags + + - name: {{ azure.vm_name_prefix }}-node + size: Standard_DS1_v2 + os_type: linux + count: {{ platform.worker_vms }} + # One host will need to be a bastian host UNLESS the bastian host is provided in another way + bastian_host: false + # roles are how you define a grouping of nodes. These values will be used to create an inventory of your cluster + # Must be a member of the 'role' in core + roles: + - linux + - worker + - filebeat + - node_exporter + - reboot + delete_os_disk_on_termination: false + delete_data_disks_on_termination: false + # NOTE: This must match the interfaces public_ip options. If one interface has enabled public_ip then this MUST be true else false + public_ips: true + depends_on: + # Put the name of the resource. If a VM then only put the 'name:' value. The count portion will be automatically added + - '${var.master_depends_on}' + tags: *tags + + security: + # Each node will have firewalld (redhat) enabled or not allowing the given ports + firewall: + enable: false + # These ports can be in sync with NSG rules below or not. Meaning, you may have NSG disabled and want each node to block + # Each node could belong to a different grouping with a need for different ports + ports_open: + - 443 + - 22 + - 6443 + - 2379-2380 + - 10250 + - 10251 + - 10252 + - 10255 + + # NOTE: For Kubernetes builds we should *not* use provisioners to bootstrap any data. The point is to build a common + # infrastructure and then collect IPs etc. that are required for K8s automation to build. The ONLY possible + # exception is for automated testing in VSTS (maybe) + bootstrap: + # Can use the global values or override them on a per vm type basis + ssh: *ssh + + provisioners: + file: + enable: false + # NOTE: Pay close attention to the trailing '/' for 'source'. If you leave '/' off of the end of source then the directory tree will be copied to 'destination'. If you add '/' then the contents of that directory will be copied into the destination directory. + source: "../../../../core/src/scripts/kubernetes/linux" + destination: "/home/${var.admin_username}" + + remote_exec: + enable: false + # NOTE: Use Terraform vars if you need to embed variables in the commands. + inline: # Example is below and only here to show + - 'chmod +x ${var.file_destination}/linux/make-executable.sh' + - '${var.file_destination}/linux/make-executable.sh' + - 'echo ${var.admin_password} | sudo -S ${var.file_destination}/linux/prepare-system-kubernetes-redhat.sh' + + testing_inline: + - 'echo \"##vso[task.setvariable variable=toPrint]Print Me Please\"' + + interfaces: + # A VM can have multiple IP addresses and interfaces. In this case we are + # saying there is one public IP per network interface + - network_interface: + # name has vm name appended to it so only put basic type name here + name: nic + primary: true + tags: *tags + + # This only works with certain size VMs + # Accelerated Networking is supported on most general purpose and compute-optimized instance sizes with 2 or more vCPUs. + # These supported series are: D/DSv2 and F/Fs. On instances that support hyperthreading, Accelerated Networking is supported + # on VM instances with 4 or more vCPUs. Supported series are: D/DSv3, E/ESv3, Fsv2, and Ms/Mms. + # https://docs.microsoft.com/en-us/azure/virtual-network/create-vm-accelerated-networking-cli + enable_accelerated_networking: false + + ip_configuration: + name: ip-config + # subnet_id is generated so use terraform variable + private_ip: + output: + enable: true + sensitive: false + address_allocation: dynamic + # address only applies if 'address_allocation' is 'static' + address: 10.0.2.5 + + public_ip: + # If you set to false there must be some host set as a bastian host + enable: true + output: + enable: true + sensitive: false + name: pip-epiphany + address_allocation: static + idle_timeout_in_minutes: 30 + sku: Standard + tags: *tags + + - name: {{ azure.vm_name_prefix }}-prometheus + size: Standard_DS1_v2 + os_type: linux + count: 1 + # One host will need to be a bastian host UNLESS the bastian host is provided in another way + bastian_host: false + # roles are how you define a grouping of nodes. These values will be used to create an inventory of your cluster + # Must be a member of the 'role' in core + roles: + - linux + - prometheus + - grafana + - node_exporter + - filebeat + - reboot + delete_os_disk_on_termination: false + delete_data_disks_on_termination: false + # NOTE: This must match the interfaces public_ip options. If one interface has enabled public_ip then this MUST be true else false + public_ips: true + depends_on: + # Put the name of the resource. If a VM then only put the 'name:' value. The count portion will be automatically added + - '${var.master_depends_on}' + tags: *tags + + security: + # Each node will have firewalld (redhat) enabled or not allowing the given ports + firewall: + enable: false + # These ports can be in sync with NSG rules below or not. Meaning, you may have NSG disabled and want each node to block + # Each node could belong to a different grouping with a need for different ports + ports_open: + - 443 + - 22 + - 6443 + - 2379-2380 + - 10250 + - 10251 + - 10252 + - 10255 + + # NOTE: For Kubernetes builds we should *not* use provisioners to bootstrap any data. The point is to build a common + # infrastructure and then collect IPs etc. that are required for K8s automation to build. The ONLY possible + # exception is for automated testing in VSTS (maybe) + bootstrap: + # Can use the global values or override them on a per vm type basis + ssh: *ssh + + provisioners: + file: + enable: false + # NOTE: Pay close attention to the trailing '/' for 'source'. If you leave '/' off of the end of source then the directory tree will be copied to 'destination'. If you add '/' then the contents of that directory will be copied into the destination directory. + source: "../../../../core/src/scripts/kubernetes/linux" + destination: "/home/${var.admin_username}" + + remote_exec: + enable: false + # NOTE: Use Terraform vars if you need to embed variables in the commands. + inline: # Example is below and only here to show + - 'chmod +x ${var.file_destination}/linux/make-executable.sh' + - '${var.file_destination}/linux/make-executable.sh' + - 'echo ${var.admin_password} | sudo -S ${var.file_destination}/linux/prepare-system-kubernetes-redhat.sh' + + testing_inline: + - 'echo \"##vso[task.setvariable variable=toPrint]Print Me Please\"' + + interfaces: + # A VM can have multiple IP addresses and interfaces. In this case we are + # saying there is one public IP per network interface + - network_interface: + # name has vm name appended to it so only put basic type name here + name: nic + primary: true + tags: *tags + + # This only works with certain size VMs + # Accelerated Networking is supported on most general purpose and compute-optimized instance sizes with 2 or more vCPUs. + # These supported series are: D/DSv2 and F/Fs. On instances that support hyperthreading, Accelerated Networking is supported + # on VM instances with 4 or more vCPUs. Supported series are: D/DSv3, E/ESv3, Fsv2, and Ms/Mms. + # https://docs.microsoft.com/en-us/azure/virtual-network/create-vm-accelerated-networking-cli + enable_accelerated_networking: false + + ip_configuration: + name: ip-config + # subnet_id is generated so use terraform variable + private_ip: + output: + enable: true + sensitive: false + address_allocation: dynamic + # address only applies if 'address_allocation' is 'static' + address: 10.0.2.5 + + public_ip: + # If you set to false there must be some host set as a bastian host + enable: true + output: + enable: true + sensitive: false + name: pip-epiphany + address_allocation: static + idle_timeout_in_minutes: 30 + sku: Standard + tags: *tags + + - name: {{ azure.vm_name_prefix }}-kafka + size: Standard_DS2_v2 + os_type: linux + count: {{ platform.kafka_vms }} + # One host will need to be a bastian host UNLESS the bastian host is provided in another way + bastian_host: false + # roles are how you define a grouping of nodes. These values will be used to create an inventory of your cluster + # Must be a member of the 'role' in core + roles: + - linux + - jmx-exporter + - zookeeper + - kafka + - node_exporter + - filebeat + - kafka-exporter + - reboot + delete_os_disk_on_termination: false + delete_data_disks_on_termination: false + # NOTE: This must match the interfaces public_ip options. If one interface has enabled public_ip then this MUST be true else false + public_ips: true + depends_on: + # Put the name of the resource. If a VM then only put the 'name:' value. The count portion will be automatically added + - '${var.master_depends_on}' + tags: *tags + + security: + # Each node will have firewalld (redhat) enabled or not allowing the given ports + firewall: + enable: false + # These ports can be in sync with NSG rules below or not. Meaning, you may have NSG disabled and want each node to block + # Each node could belong to a different grouping with a need for different ports + ports_open: + - 443 + - 22 + - 6443 + - 2379-2380 + - 10250 + - 10251 + - 10252 + - 10255 + + # NOTE: For Kubernetes builds we should *not* use provisioners to bootstrap any data. The point is to build a common + # infrastructure and then collect IPs etc. that are required for K8s automation to build. The ONLY possible + # exception is for automated testing in VSTS (maybe) + bootstrap: + # Can use the global values or override them on a per vm type basis + ssh: *ssh + + provisioners: + file: + enable: false + # NOTE: Pay close attention to the trailing '/' for 'source'. If you leave '/' off of the end of source then the directory tree will be copied to 'destination'. If you add '/' then the contents of that directory will be copied into the destination directory. + source: "../../../../core/src/scripts/kubernetes/linux" + destination: "/home/${var.admin_username}" + + remote_exec: + enable: false + # NOTE: Use Terraform vars if you need to embed variables in the commands. + inline: # Example is below and only here to show + - 'chmod +x ${var.file_destination}/linux/make-executable.sh' + - '${var.file_destination}/linux/make-executable.sh' + - 'echo ${var.admin_password} | sudo -S ${var.file_destination}/linux/prepare-system-kubernetes-redhat.sh' + + testing_inline: + - 'echo \"##vso[task.setvariable variable=toPrint]Print Me Please\"' + + interfaces: + # A VM can have multiple IP addresses and interfaces. In this case we are + # saying there is one public IP per network interface + - network_interface: + # name has vm name appended to it so only put basic type name here + name: nic + primary: true + tags: *tags + + # This only works with certain size VMs + # Accelerated Networking is supported on most general purpose and compute-optimized instance sizes with 2 or more vCPUs. + # These supported series are: D/DSv2 and F/Fs. On instances that support hyperthreading, Accelerated Networking is supported + # on VM instances with 4 or more vCPUs. Supported series are: D/DSv3, E/ESv3, Fsv2, and Ms/Mms. + # https://docs.microsoft.com/en-us/azure/virtual-network/create-vm-accelerated-networking-cli + enable_accelerated_networking: false + + ip_configuration: + name: ip-config + # subnet_id is generated so use terraform variable + private_ip: + output: + enable: true + sensitive: false + address_allocation: dynamic + # address only applies if 'address_allocation' is 'static' + address: 10.0.2.5 + + public_ip: + # If you set to false there must be some host set as a bastian host + enable: true + output: + enable: true + sensitive: false + name: pip-epiphany + address_allocation: static + idle_timeout_in_minutes: 30 + sku: Standard + tags: *tags + + - name: {{ azure.vm_name_prefix }}-elk + size: Standard_DS1_v2 + os_type: linux + count: 1 + # One host will need to be a bastian host UNLESS the bastian host is provided in another way + bastian_host: false + # roles are how you define a grouping of nodes. These values will be used to create an inventory of your cluster + # Must be a member of the 'role' in core + roles: + - linux + - node_exporter + - elasticsearch + - elasticsearch-curator + - kibana + - filebeat + + delete_os_disk_on_termination: false + delete_data_disks_on_termination: false + # NOTE: This must match the interfaces public_ip options. If one interface has enabled public_ip then this MUST be true else false + public_ips: true + depends_on: + # Put the name of the resource. If a VM then only put the 'name:' value. The count portion will be automatically added + - '${var.master_depends_on}' + tags: *tags + + security: + # Each node will have firewalld (redhat) enabled or not allowing the given ports + firewall: + enable: false + # These ports can be in sync with NSG rules below or not. Meaning, you may have NSG disabled and want each node to block + # Each node could belong to a different grouping with a need for different ports + ports_open: + - 443 + - 22 + - 6443 + - 2379-2380 + - 10250 + - 10251 + - 10252 + - 10255 + + # NOTE: For Kubernetes builds we should *not* use provisioners to bootstrap any data. The point is to build a common + # infrastructure and then collect IPs etc. that are required for K8s automation to build. The ONLY possible + # exception is for automated testing in VSTS (maybe) + bootstrap: + # Can use the global values or override them on a per vm type basis + ssh: *ssh + + provisioners: + file: + enable: false + # NOTE: Pay close attention to the trailing '/' for 'source'. If you leave '/' off of the end of source then the directory tree will be copied to 'destination'. If you add '/' then the contents of that directory will be copied into the destination directory. + source: "../../../../core/src/scripts/kubernetes/linux" + destination: "/home/${var.admin_username}" + + remote_exec: + enable: false + # NOTE: Use Terraform vars if you need to embed variables in the commands. + inline: # Example is below and only here to show + - 'chmod +x ${var.file_destination}/linux/make-executable.sh' + - '${var.file_destination}/linux/make-executable.sh' + - 'echo ${var.admin_password} | sudo -S ${var.file_destination}/linux/prepare-system-kubernetes-redhat.sh' + + testing_inline: + - 'echo \"##vso[task.setvariable variable=toPrint]Print Me Please\"' + + interfaces: + # A VM can have multiple IP addresses and interfaces. In this case we are + # saying there is one public IP per network interface + - network_interface: + # name has vm name appended to it so only put basic type name here + name: nic + primary: true + tags: *tags + + # This only works with certain size VMs + # Accelerated Networking is supported on most general purpose and compute-optimized instance sizes with 2 or more vCPUs. + # These supported series are: D/DSv2 and F/Fs. On instances that support hyperthreading, Accelerated Networking is supported + # on VM instances with 4 or more vCPUs. Supported series are: D/DSv3, E/ESv3, Fsv2, and Ms/Mms. + # https://docs.microsoft.com/en-us/azure/virtual-network/create-vm-accelerated-networking-cli + enable_accelerated_networking: false + + ip_configuration: + name: ip-config + # subnet_id is generated so use terraform variable + private_ip: + output: + enable: true + sensitive: false + address_allocation: dynamic + # address only applies if 'address_allocation' is 'static' + address: 10.0.2.5 + + public_ip: + # If you set to false there must be some host set as a bastian host + enable: true + output: + enable: true + sensitive: false + name: pip-epiphany + address_allocation: static + idle_timeout_in_minutes: 30 + sku: Standard + tags: *tags + +{% if platform.postgresql_vms is defined and platform.postgresql_vms > 0 %} + - name: {{ azure.vm_name_prefix }}-postgresql + size: Standard_DS1_v2 + os_type: linux + count: {{ platform.postgresql_vms }} + # One host will need to be a bastian host UNLESS the bastian host is provided in another way + bastian_host: false + # roles are how you define a grouping of nodes. These values will be used to create an inventory of your cluster + # Must be a member of the 'role' in core + roles: + - node_exporter + - filebeat + - postgresql + - linux + + delete_os_disk_on_termination: false + delete_data_disks_on_termination: false + # NOTE: This must match the interfaces public_ip options. If one interface has enabled public_ip then this MUST be true else false + public_ips: true + depends_on: + # Put the name of the resource. If a VM then only put the 'name:' value. The count portion will be automatically added + - '${var.master_depends_on}' + tags: *tags + + security: + # Each node will have firewalld (redhat) enabled and allowing only given ports, remember that Epiphany needs port 22 for bootstrapping environments + firewall: + enable: false + ports_open: + - 22/tcp + - 443/tcp + + # NOTE: For Kubernetes builds we should *not* use provisioners to bootstrap any data. The point is to build a common + # infrastructure and then collect IPs etc. that are required for K8s automation to build. The ONLY possible + # exception is for automated testing in VSTS + bootstrap: + # Can use the global values or override them on a per vm type basis + ssh: *ssh + + provisioners: + file: + enable: false + # NOTE: Pay close attention to the trailing '/' for 'source'. If you leave '/' off of the end of source then the directory tree will be copied to 'destination'. If you add '/' then the contents of that directory will be copied into the destination directory. + source: "../../../../core/src/scripts/kubernetes/linux" + destination: "/home/${var.admin_username}" + + remote_exec: + enable: false + # NOTE: Use Terraform vars if you need to embed variables in the commands. + inline: # Example is below and only here to show + - 'chmod +x ${var.file_destination}/linux/make-executable.sh' + - '${var.file_destination}/linux/make-executable.sh' + - 'echo ${var.admin_password} | sudo -S ${var.file_destination}/linux/prepare-system-kubernetes-redhat.sh' + + testing_inline: + - 'echo \"##vso[task.setvariable variable=toPrint]Print Me Please\"' + + interfaces: + # A VM can have multiple IP addresses and interfaces. In this case we are + # saying there is one public IP per network interface + - network_interface: + # name has vm name appended to it so only put basic type name here + name: nic + primary: true + tags: *tags + + # This only works with certain size VMs + # Accelerated Networking is supported on most general purpose and compute-optimized instance sizes with 2 or more vCPUs. + # These supported series are: D/DSv2 and F/Fs. On instances that support hyperthreading, Accelerated Networking is supported + # on VM instances with 4 or more vCPUs. Supported series are: D/DSv3, E/ESv3, Fsv2, and Ms/Mms. + # https://docs.microsoft.com/en-us/azure/virtual-network/create-vm-accelerated-networking-cli + enable_accelerated_networking: false + + ip_configuration: + name: ip-config + # subnet_id is generated so use terraform variable + private_ip: + output: + enable: true + sensitive: false + address_allocation: Dynamic + # address only applies if 'address_allocation' is 'static' + address: 10.0.2.5 + + public_ip: + # If you set to false there must be some host set as a bastian host + enable: true + output: + enable: true + sensitive: false + name: pip-epiphany + address_allocation: static + idle_timeout_in_minutes: 30 + sku: Standard + tags: *tags +{% endif %} + + kubernetes: + version: 1.13.0 + storage: + enable: true + #valid chocies: + #azure-file + #WIP + type: azure-file + tags: *tags + quota: 50 + + haproxy: + stats: + enable: true + user: operations + password: your-haproxy-stats-pwd + frontend: + - name: https_front + port: 443 + https: yes + backend: + - http_back1 + backend: + - name: http_back1 + server_groups: + - worker + #servers: + # Definition for server to that hosts the application. + #- name: "node1" + # address: "epiphany-vm1.domain.com" + port: 30104 + + monitoring: + alerts: + enable: False # required value + handlers: + mail: + enable: False # required value + smtp_from: "alert@test.com" + smtp_host: "smtp-url:smtp-port" + smtp_auth_username: "your-smtp-user@domain.com" + smtp_auth_password: "your-smtp-password" + smtp_require_tls: True + recipients: + - recipient1@domain.com + - recipient2@domain.com + slack: + enable: False # required value + api_url: url-to-slack-workspace-api.slack.com + pagerduty: + enable: False # required value + service_key: your-service-key + rules: + - name: "UpDown" + expression: up == 0 + duration: 1m #1s, 1m, 1h, 1d, 1w, ... + severity: critical + message: "Node is down." + - name: "DiskSpace" + expression: ((node_filesystem_avail_bytes* 100) / node_filesystem_size_bytes) < 20 # 100 - 80 + duration: 1m #1s, 1m, 1h, 1d, 1w, ... + severity: critical + message: "Disk usage is above 80%" + - name: "DiskSpacePrediction" + expression: predict_linear(node_filesystem_free_bytes{job="node"}[1h], 48 * 3600) < 0 + duration: 1m #1s, 1m, 1h, 1d, 1w, ... + severity: warning + message: "Disk will run out of space in less than 48h" + - name: "MemoryUsage" + expression: (sum by (instance) (node_memory_MemTotal_bytes) - sum by (instance)(node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes) ) / sum by (instance)(node_memory_MemTotal_bytes) * 100 > 80 + duration: 15m #1s, 1m, 1h, 1d, 1w, ... + severity: warning + message: "Server memory has been used in more than 80% during last 15 minutes." + - name: "CpuLoad" + expression: 100 - (avg by (instance) (irate(node_cpu_seconds_total{job="node",mode="idle"}[5m])) * 100) > 80 + duration: 15m #1s, 1m, 1h, 1d, 1w, ... + severity: critical + message: "CPU utilization has exceeded 80% over last 15 minutes." + - name: "KafkaConsumerLag" + expression: sum by(consumergroup) (kafka_consumergroup_lag) > 1000 + duration: 15m #1s, 1m, 1h, 1d, 1w, ... + severity: critical + message: "Kafka consumers are lagging more than 1000 messages over last 15 minutes." \ No newline at end of file diff --git a/core/data/metal/epiphany-lab/data.yaml b/core/data/metal/epiphany-lab/data.yaml new file mode 100644 index 0000000000..73608a0f90 --- /dev/null +++ b/core/data/metal/epiphany-lab/data.yaml @@ -0,0 +1,403 @@ +# Primary manifest data file used to build out the given cluster. +# In on-premise, these values are given to us and we create this file. +# Any platform specific items that may be needed will be added to this common manifest + + +kind: datafile +version: 1.0.1 + +# This will apply to a VPN like environment or an air-gapped like environment +bastian: + enable: False + # This host will be set ONLY for environments where a bastian host is supplied to us + host: + +build: + version: &version 1.0.1 + # Type of build environment + environment: &env development + # Name of the given release. Version will be appended + release: &release epiphany-metal + # If repo_root is true then add that as a prefix for the output + repo_root: True + # Should only be on of these: azure, aws, metal, vbox, vmware + platform: metal + output: /build/$PLATFORM/infrastructure/epiphany + +ansible_tags: + - key: environment + value: development + - key: version + value: 1.0.1 + - key: release + value: epiphany-lab + - key: resourceType + value: epiphany-lab-builds + +# The primary 'operations' user and the ssh key path +admin_user: + name: operations + key_path: ~/.ssh/epiphany-key/id_rsa + +# This is the master set of available roles +ansible_roles: + - master + - worker + - kafka + - zookeeper + - grafana + - node_exporter + - prometheus + - elasticsearch + - elasticsearch-curator + - kibana + - filebeat + - jmx-exporter + - kafka-exporter + - haproxy_tls_termination + - haproxy_exporter + - reboot + # These last two must always be present + - linux + # - windows + +domain: + enable: False + name: example.com + +# Nodes in the cluster along with metadata and the groups they belong to +nodes: + - name: k8s-master + # Mainly used for Azure because Terraform is being used and has the ability to use count! + count: 1 + # If enabled then the first host in a 'count' pattern will be used which will end with '-001' + bastian_host: False + # Represents if there are public IPs. If not then a VPN connection is assumed. + # `public` could also mean exposed to a larger network in the case of on-premise with metal or vmware + public_ips: True + # The roles that this given node belongs to + ansible_roles: + - linux + - master + - node_exporter + - filebeat + - prometheus + - grafana + - elasticsearch + - elasticsearch-curator + - kibana + - haproxy_tls_termination + - reboot + + security: + firewall: + enable: false + ports: + + # hosts the individual host names and IPs based on count above + hosts: + - name: epiphany-lab1 + # May want to address multiple nics and bonding options later + ips: + # Only takes the first public ip + public: 192.168.1.4 + private: 192.168.1.4 + + - name: k8s-worker1 + # Mainly used for Azure because Terraform is being used and has the ability to use count! + count: 1 + # If enabled then the first host in a 'count' pattern will be used which will end with '-001' + bastian_host: False + # Represents if there are public IPs. If not then a VPN connection is assumed. + # `public` could also mean exposed to a larger network in the case of on-premise with metal or vmware + public_ips: True + # The roles that this given node belongs to + ansible_roles: + - linux + - worker + - node_exporter + - filebeat + - reboot + + security: + firewall: + enable: false + ports: + + # hosts the individual host names and IPs based on count above + hosts: + - name: epiphany-lab2 + # May want to address multiple nics and bonding options later + ips: + # Only takes the first public ip + public: 192.168.1.15 + private: 192.168.1.15 + + - name: k8s-worker2 + # Mainly used for Azure because Terraform is being used and has the ability to use count! + count: 1 + # If enabled then the first host in a 'count' pattern will be used which will end with '-001' + bastian_host: False + # Represents if there are public IPs. If not then a VPN connection is assumed. + # `public` could also mean exposed to a larger network in the case of on-premise with metal or vmware + public_ips: True + # The roles that this given node belongs to + ansible_roles: + - linux + - worker + - node_exporter + - filebeat + - reboot + security: + firewall: + enable: false + ports: + + # hosts the individual host names and IPs based on count above + hosts: + - name: epiphany-lab3 + # May want to address multiple nics and bonding options later + ips: + # Only takes the first public ip + public: 192.168.1.31 + private: 192.168.1.31 + + - name: monitoring + # Mainly used for Azure because Terraform is being used and has the ability to use count! + count: 1 + # If enabled then the first host in a 'count' pattern will be used which will end with '-001' + bastian_host: False + # Represents if there are public IPs. If not then a VPN connection is assumed. + # `public` could also mean exposed to a larger network in the case of on-premise with metal or vmware + public_ips: True + # The roles that this given node belongs to + ansible_roles: + - linux + - node_exporter + - filebeat + - prometheus + - grafana + - reboot + + security: + firewall: + enable: false + ports: + + # hosts the individual host names and IPs based on count above + hosts: + - name: epiphany-lab4 + # May want to address multiple nics and bonding options later + ips: + # Only takes the first public ip + public: 192.168.1.6 + private: 192.168.1.6 + + - name: centralized-log + # Mainly used for Azure because Terraform is being used and has the ability to use count! + count: 1 + # If enabled then the first host in a 'count' pattern will be used which will end with '-001' + bastian_host: False + # Represents if there are public IPs. If not then a VPN connection is assumed. + # `public` could also mean exposed to a larger network in the case of on-premise with metal or vmware + public_ips: True + # The roles that this given node belongs to + ansible_roles: + - linux + - node_exporter + - filebeat + - elasticsearch + - elasticsearch-curator + - kibana + - reboot + + security: + firewall: + enable: false + ports: + + # hosts the individual host names and IPs based on count above + hosts: + - name: epiphany-lab5 + # May want to address multiple nics and bonding options later + ips: + # Only takes the first public ip + public: 192.168.1.7 + private: 192.168.1.7 + + - name: kafka1 + # Mainly used for Azure because Terraform is being used and has the ability to use count! + count: 1 + # If enabled then the first host in a 'count' pattern will be used which will end with '-001' + bastian_host: False + # Represents if there are public IPs. If not then a VPN connection is assumed. + # `public` could also mean exposed to a larger network in the case of on-premise with metal or vmware + public_ips: True + # The roles that this given node belongs to + ansible_roles: + - linux + - kafka + - zookeeper + - jmx-exporter + - node_exporter + - kafka-exporter + - filebeat + - reboot + + security: + firewall: + enable: false + ports: + + # hosts the individual host names and IPs based on count above + hosts: + - name: epiphany-lab6 + # May want to address multiple nics and bonding options later + ips: + # Only takes the first public ip + public: 192.168.1.32 + private: 192.168.1.32 + + - name: kafka2 + # Mainly used for Azure because Terraform is being used and has the ability to use count! + count: 1 + # If enabled then the first host in a 'count' pattern will be used which will end with '-001' + bastian_host: False + # Represents if there are public IPs. If not then a VPN connection is assumed. + # `public` could also mean exposed to a larger network in the case of on-premise with metal or vmware + public_ips: True + # The roles that this given node belongs to + ansible_roles: + - linux + - kafka + - zookeeper + - jmx-exporter + - node_exporter + - kafka-exporter + - filebeat + - reboot + #- reboot + + security: + firewall: + enable: false + ports: + + # hosts the individual host names and IPs based on count above + hosts: + - name: epiphany-lab7 + # May want to address multiple nics and bonding options later + ips: + # Only takes the first public ip + public: 192.168.1.63 + private: 192.168.1.63 + + - name: load-balancer + # Mainly used for Azure because Terraform is being used and has the ability to use count! + count: 1 + # If enabled then the first host in a 'count' pattern will be used which will end with '-001' + bastian_host: False + # Represents if there are public IPs. If not then a VPN connection is assumed. + # `public` could also mean exposed to a larger network in the case of on-premise with metal or vmware + public_ips: True + # The roles that this given node belongs to + ansible_roles: + - linux + - haproxy_tls_termination + - haproxy_exporter + - node_exporter + - reboot + #- reboot + + security: + firewall: + enable: false + ports: + + # hosts the individual host names and IPs based on count above + hosts: + - name: epiphany-lab8 + # May want to address multiple nics and bonding options later + ips: + # Only takes the first public ip + public: 192.168.1.66 + private: 192.168.1.66 + +kubernetes: + version: 1.13.0 + storage: + enable: False + +haproxy: + stats: + enable: true + user: operations + password: your-stats-pwd + frontend: + - name: https_front + port: 443 + https: yes + backend: + - http_back1 + backend: + - name: http_back1 + server_groups: + - worker + #servers: + # Definition for server to that hosts the application. + #- name: "node1" + # address: "epiphany-lab1.domain.com" + port: 30001 + +monitoring: + alerts: + enable: False # required value + handlers: + mail: + enable: False # required value + smtp_from: "alert@test.com" + smtp_host: "smtp-url:smtp-port" + smtp_auth_username: "your-smtp-user@domain.com" + smtp_auth_password: "your-smtp-password" + smtp_require_tls: True + recipients: + - recipient1@domain.com + - recipient2@domain.com + slack: + enable: False # required value + api_url: url-to-slack-workspace-api.slack.com + pagerduty: + enable: False # required value + service_key: your-service-key + rules: + - name: "UpDown" + expression: up == 0 + duration: 1m #1s, 1m, 1h, 1d, 1w, ... + severity: critical + message: "Node is down." + - name: "DiskSpace" + expression: ((node_filesystem_avail_bytes* 100) / node_filesystem_size_bytes) < 20 # 100 - 80 + duration: 1m #1s, 1m, 1h, 1d, 1w, ... + severity: critical + message: "Disk usage is above 80%" + - name: "DiskSpacePrediction" + expression: predict_linear(node_filesystem_free_bytes{job="node"}[1h], 48 * 3600) < 0 + duration: 1m #1s, 1m, 1h, 1d, 1w, ... + severity: warning + message: "Disk will run out of space in less than 48h" + - name: "MemoryUsage" + expression: (sum by (instance) (node_memory_MemTotal_bytes) - sum by (instance)(node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes) ) / sum by (instance)(node_memory_MemTotal_bytes) * 100 > 80 + duration: 15m #1s, 1m, 1h, 1d, 1w, ... + severity: warning + message: "Server memory has been used in more than 80% during last 15 minutes." + - name: "CpuLoad" + expression: 100 - (avg by (instance) (irate(node_cpu_seconds_total{job="node",mode="idle"}[5m])) * 100) > 80 + duration: 15m #1s, 1m, 1h, 1d, 1w, ... + severity: critical + message: "CPU utilization has exceeded 80% over last 15 minutes." + - name: "KafkaConsumerLag" + expression: sum by(consumergroup) (kafka_consumergroup_lag) > 1000 + duration: 15m #1s, 1m, 1h, 1d, 1w, ... + severity: critical + message: "Kafka consumers are lagging more than 1000 messages over last 15 minutes." + \ No newline at end of file diff --git a/core/data/vbox/README.md b/core/data/vbox/README.md new file mode 100644 index 0000000000..77b3597f5f --- /dev/null +++ b/core/data/vbox/README.md @@ -0,0 +1,7 @@ +# Virtual Box + +## Laptop Version + +This version is for building and running on a single node such as Laptops. It will be out a Virtual Box environment on any laptop or single workstation and then lay Epiphany down on that environment which allows for complete isolated running with no Internet connectivity. + +The initial build will require Internet access to pull down the secure artifacts to build out the environment. diff --git a/core/data/vmware/README.md b/core/data/vmware/README.md new file mode 100644 index 0000000000..5c788e2059 --- /dev/null +++ b/core/data/vmware/README.md @@ -0,0 +1,3 @@ +# VMWare + +The VMWare version builds out any required environmental artifacts specific to VMWare but in most cases the customer will provide the required information such as IPs, gateways, etc. diff --git a/core/data/vmware/epiphany-lab/data.yaml b/core/data/vmware/epiphany-lab/data.yaml new file mode 100644 index 0000000000..099dc635cb --- /dev/null +++ b/core/data/vmware/epiphany-lab/data.yaml @@ -0,0 +1,403 @@ +# Primary manifest data file used to build out the given cluster. +# In on-premise, these values are given to us and we create this file. +# Any platform specific items that may be needed will be added to this common manifest + + +kind: datafile +version: 1.0.1 + +# This will apply to a VPN like environment or an air-gapped like environment +bastian: + enable: False + # This host will be set ONLY for environments where a bastian host is supplied to us + host: + +build: + version: &version 1.0.1 + # Type of build environment + environment: &env development + # Name of the given release. Version will be appended + release: &release epiphany-vmware + # If repo_root is true then add that as a prefix for the output + repo_root: True + # Should only be on of these: azure, aws, metal, vbox, vmware + platform: vmware + output: /build/$PLATFORM/infrastructure/epiphany + +ansible_tags: + - key: environment + value: development + - key: version + value: 1.0.1 + - key: release + value: epiphany-lab + - key: resourceType + value: epiphany-lab-builds + +# The primary 'operations' user and the ssh key path +admin_user: + name: operations + key_path: ~/.ssh/epiphany-key/id_rsa + +# This is the master set of available roles +ansible_roles: + - master + - worker + - kafka + - zookeeper + - grafana + - node_exporter + - prometheus + - elasticsearch + - elasticsearch-curator + - kibana + - filebeat + - jmx-exporter + - kafka-exporter + - haproxy_tls_termination + - haproxy_exporter + - reboot + # These last two must always be present + - linux + # - windows + +domain: + enable: False + name: example.com + +# Nodes in the cluster along with metadata and the groups they belong to +nodes: + - name: k8s-master + # Mainly used for Azure because Terraform is being used and has the ability to use count! + count: 1 + # If enabled then the first host in a 'count' pattern will be used which will end with '-001' + bastian_host: False + # Represents if there are public IPs. If not then a VPN connection is assumed. + # `public` could also mean exposed to a larger network in the case of on-premise with metal or vmware + public_ips: True + # The roles that this given node belongs to + ansible_roles: + - linux + - master + - node_exporter + - filebeat + - prometheus + - grafana + - elasticsearch + - elasticsearch-curator + - kibana + - haproxy_tls_termination + - reboot + + security: + firewall: + enable: false + ports: + + # hosts the individual host names and IPs based on count above + hosts: + - name: epiphany-lab1 + # May want to address multiple nics and bonding options later + ips: + # Only takes the first public ip + public: 192.168.1.4 + private: 192.168.1.4 + + - name: k8s-worker1 + # Mainly used for Azure because Terraform is being used and has the ability to use count! + count: 1 + # If enabled then the first host in a 'count' pattern will be used which will end with '-001' + bastian_host: False + # Represents if there are public IPs. If not then a VPN connection is assumed. + # `public` could also mean exposed to a larger network in the case of on-premise with metal or vmware + public_ips: True + # The roles that this given node belongs to + ansible_roles: + - linux + - worker + - node_exporter + - filebeat + - reboot + + security: + firewall: + enable: false + ports: + + # hosts the individual host names and IPs based on count above + hosts: + - name: epiphany-lab2 + # May want to address multiple nics and bonding options later + ips: + # Only takes the first public ip + public: 192.168.1.15 + private: 192.168.1.15 + + - name: k8s-worker2 + # Mainly used for Azure because Terraform is being used and has the ability to use count! + count: 1 + # If enabled then the first host in a 'count' pattern will be used which will end with '-001' + bastian_host: False + # Represents if there are public IPs. If not then a VPN connection is assumed. + # `public` could also mean exposed to a larger network in the case of on-premise with metal or vmware + public_ips: True + # The roles that this given node belongs to + ansible_roles: + - linux + - worker + - node_exporter + - filebeat + - reboot + security: + firewall: + enable: false + ports: + + # hosts the individual host names and IPs based on count above + hosts: + - name: epiphany-lab3 + # May want to address multiple nics and bonding options later + ips: + # Only takes the first public ip + public: 192.168.1.31 + private: 192.168.1.31 + + - name: monitoring + # Mainly used for Azure because Terraform is being used and has the ability to use count! + count: 1 + # If enabled then the first host in a 'count' pattern will be used which will end with '-001' + bastian_host: False + # Represents if there are public IPs. If not then a VPN connection is assumed. + # `public` could also mean exposed to a larger network in the case of on-premise with metal or vmware + public_ips: True + # The roles that this given node belongs to + ansible_roles: + - linux + - node_exporter + - filebeat + - prometheus + - grafana + - reboot + + security: + firewall: + enable: false + ports: + + # hosts the individual host names and IPs based on count above + hosts: + - name: epiphany-lab4 + # May want to address multiple nics and bonding options later + ips: + # Only takes the first public ip + public: 192.168.1.6 + private: 192.168.1.6 + + - name: centralized-log + # Mainly used for Azure because Terraform is being used and has the ability to use count! + count: 1 + # If enabled then the first host in a 'count' pattern will be used which will end with '-001' + bastian_host: False + # Represents if there are public IPs. If not then a VPN connection is assumed. + # `public` could also mean exposed to a larger network in the case of on-premise with metal or vmware + public_ips: True + # The roles that this given node belongs to + ansible_roles: + - linux + - node_exporter + - filebeat + - elasticsearch + - elasticsearch-curator + - kibana + - reboot + + security: + firewall: + enable: false + ports: + + # hosts the individual host names and IPs based on count above + hosts: + - name: epiphany-lab5 + # May want to address multiple nics and bonding options later + ips: + # Only takes the first public ip + public: 192.168.1.7 + private: 192.168.1.7 + + - name: kafka1 + # Mainly used for Azure because Terraform is being used and has the ability to use count! + count: 1 + # If enabled then the first host in a 'count' pattern will be used which will end with '-001' + bastian_host: False + # Represents if there are public IPs. If not then a VPN connection is assumed. + # `public` could also mean exposed to a larger network in the case of on-premise with metal or vmware + public_ips: True + # The roles that this given node belongs to + ansible_roles: + - linux + - kafka + - zookeeper + - jmx-exporter + - node_exporter + - kafka-exporter + - filebeat + - reboot + + security: + firewall: + enable: false + ports: + + # hosts the individual host names and IPs based on count above + hosts: + - name: epiphany-lab6 + # May want to address multiple nics and bonding options later + ips: + # Only takes the first public ip + public: 192.168.1.32 + private: 192.168.1.32 + + - name: kafka2 + # Mainly used for Azure because Terraform is being used and has the ability to use count! + count: 1 + # If enabled then the first host in a 'count' pattern will be used which will end with '-001' + bastian_host: False + # Represents if there are public IPs. If not then a VPN connection is assumed. + # `public` could also mean exposed to a larger network in the case of on-premise with metal or vmware + public_ips: True + # The roles that this given node belongs to + ansible_roles: + - linux + - kafka + - zookeeper + - jmx-exporter + - node_exporter + - kafka-exporter + - filebeat + - reboot + #- reboot + + security: + firewall: + enable: false + ports: + + # hosts the individual host names and IPs based on count above + hosts: + - name: epiphany-lab7 + # May want to address multiple nics and bonding options later + ips: + # Only takes the first public ip + public: 192.168.1.63 + private: 192.168.1.63 + + - name: load-balancer + # Mainly used for Azure because Terraform is being used and has the ability to use count! + count: 1 + # If enabled then the first host in a 'count' pattern will be used which will end with '-001' + bastian_host: False + # Represents if there are public IPs. If not then a VPN connection is assumed. + # `public` could also mean exposed to a larger network in the case of on-premise with metal or vmware + public_ips: True + # The roles that this given node belongs to + ansible_roles: + - linux + - haproxy_tls_termination + - haproxy_exporter + - node_exporter + - reboot + #- reboot + + security: + firewall: + enable: false + ports: + + # hosts the individual host names and IPs based on count above + hosts: + - name: epiphany-lab8 + # May want to address multiple nics and bonding options later + ips: + # Only takes the first public ip + public: 192.168.1.66 + private: 192.168.1.66 + +kubernetes: + version: 1.13.0 + storage: + enable: False + +haproxy: + stats: + enable: true + user: operations + password: your-stats-pwd + frontend: + - name: https_front + port: 443 + https: yes + backend: + - http_back1 + backend: + - name: http_back1 + server_groups: + - worker + #servers: + # Definition for server to that hosts the application. + #- name: "node1" + # address: "epiphany-lab1.domain.com" + port: 30001 + +monitoring: + alerts: + enable: False # required value + handlers: + mail: + enable: False # required value + smtp_from: "alert@test.com" + smtp_host: "smtp-url:smtp-port" + smtp_auth_username: "your-smtp-user@domain.com" + smtp_auth_password: "your-smtp-password" + smtp_require_tls: True + recipients: + - recipient1@domain.com + - recipient2@domain.com + slack: + enable: False # required value + api_url: url-to-slack-workspace-api.slack.com + pagerduty: + enable: False # required value + service_key: your-service-key + rules: + - name: "UpDown" + expression: up == 0 + duration: 1m #1s, 1m, 1h, 1d, 1w, ... + severity: critical + message: "Node is down." + - name: "DiskSpace" + expression: ((node_filesystem_avail_bytes* 100) / node_filesystem_size_bytes) < 20 # 100 - 80 + duration: 1m #1s, 1m, 1h, 1d, 1w, ... + severity: critical + message: "Disk usage is above 80%" + - name: "DiskSpacePrediction" + expression: predict_linear(node_filesystem_free_bytes{job="node"}[1h], 48 * 3600) < 0 + duration: 1m #1s, 1m, 1h, 1d, 1w, ... + severity: warning + message: "Disk will run out of space in less than 48h" + - name: "MemoryUsage" + expression: (sum by (instance) (node_memory_MemTotal_bytes) - sum by (instance)(node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes) ) / sum by (instance)(node_memory_MemTotal_bytes) * 100 > 80 + duration: 15m #1s, 1m, 1h, 1d, 1w, ... + severity: warning + message: "Server memory has been used in more than 80% during last 15 minutes." + - name: "CpuLoad" + expression: 100 - (avg by (instance) (irate(node_cpu_seconds_total{job="node",mode="idle"}[5m])) * 100) > 80 + duration: 15m #1s, 1m, 1h, 1d, 1w, ... + severity: critical + message: "CPU utilization has exceeded 80% over last 15 minutes." + - name: "KafkaConsumerLag" + expression: sum by(consumergroup) (kafka_consumergroup_lag) > 1000 + duration: 15m #1s, 1m, 1h, 1d, 1w, ... + severity: critical + message: "Kafka consumers are lagging more than 1000 messages over last 15 minutes." + \ No newline at end of file