diff --git a/blueprints/gcve/monitoring/README.md b/blueprints/gcve/monitoring/README.md new file mode 100644 index 0000000000..8111ca11ad --- /dev/null +++ b/blueprints/gcve/monitoring/README.md @@ -0,0 +1,117 @@ +# Google Cloud VMWare Engine Logging Monitoring Module + +This Blueprint simplifies the setup of monitoring and syslog logging for Google Cloud VMware Engine (GCVE) private clouds. + +## Overview + +Infrastructure monitoring and logging for GCVE are typically set up using a [standalone Bindplane agent](https://cloud.google.com/vmware-engine/docs/environment/howto-cloud-monitoring-standalone). This blueprint automates the deployment of the Bindplane agent using a Managed Instance Group. The agent collects metrics and syslog logs from VMware vCenter and forwards them to Cloud Monitoring and Cloud Logging. + +

+ GCVE Logging and Monitoring Blueprint +

+ +## Deployed Resources + +This blueprint deploys and configures the following resources: + +* **Service Account:** Grants the Bindplane agent permissions to write logs/metrics and access Secret Manager. +* **Firewall Rule (optional):** Allows health checks on TCP port 5142 to ensure the agent is running. +* **Monitoring Dashboards (optional):** Provides default dashboards for GCVE metrics. +* **VM Template:** Creates a Debian 11-based template for the Bindplane agent. +* **Managed Instance Group:** Manages the deployment and provides autohealing to the Bindplane agent. +* **Secret Manager Secrets:** Stores vCenter credentials (username, password, FQDN). + +## Completing the Setup + +After deploying this blueprint, you need to complete the following steps: +* [Configure GCVE to send traffic to the Bindplane agent](https://cloud.google.com/vmware-engine/docs/environment/howto-forward-syslog), which listens on TCP port 5142 by default. +* Update secrets in Secret Manager with vCenter credentials and FQDN. + +## Troubleshooting + +If you encounter issues, check the following: + +* **Firewall:** Ensure that the firewall rule allows traffic to TCP port 5142. +* **vCenter Configuration:** Verify that GCVE is correctly configured to forward syslog messages. +* **Agent Logs:** Examine the Bindplane agent logs for errors. + +## Security Considerations + +* **Least Privilege:** Grant the Bindplane agent service account only the necessary permissions. +* **Secret Management:** Store vCenter credentials securely in Secret Manager. + + +- [Overview](#overview) +- [Deployed Resources](#deployed-resources) +- [Completing the Setup](#completing-the-setup) +- [Troubleshooting](#troubleshooting) +- [Security Considerations](#security-considerations) +- [Basic Monitoring setup with default settings](#basic-monitoring-setup-with-default-settings) +- [Variables](#variables) +- [Outputs](#outputs) + + +## Basic Monitoring setup with default settings + +```hcl + +module "gcve-monitoring" { + source = "./fabric/blueprints/gcve/monitoring" + project_id = "gcve-mon-project" + project_create = { + billing_account = "0123AB-ABCDEF-123456" + parent = "folders/1234567890" + shared_vpc_host = "abcde-prod-net-spoke-0" + } + + vm_mon_config = { + vm_mon_name = "bp-agent" + vm_mon_type = "e2-small" + vm_mon_zone = "europe-west1-b" + } + + vpc_config = { + host_project_id = "abcde-prod-net-spoke-0" + vpc_self_link = "https://www.googleapis.com/compute/v1/projects/abcde-prod-net-spoke-0/global/networks/prod-spoke-0" + subnetwork_self_link = "projects/abcde-prod-net-spoke-0/regions/europe-west1/subnetworks/prod-default-ew1" + } + + vsphere_secrets = { + vsphere_server = "gcve-mon-vsphere-server" + vsphere_user = "gcve-mon-vsphere-user" + vsphere_password = "gcve-mon-vsphere-password" + } + + sa_gcve_monitoring = "gcve-mon-sa" + gcve_region = "europe-west1" + initial_delay_sec = 180 + create_dashboards = true + create_firewall_rule = true +} +# tftest modules=7 resources=22 +``` + +## Variables + +| name | description | type | required | default | +|---|---|:---:|:---:|:---:| +| [gcve_region](variables.tf#L29) | Region where the Private Cloud is deployed. | string | ✓ | | +| [project_id](variables.tf#L56) | Project id of existing or created project. | string | ✓ | | +| [vm_mon_config](variables.tf#L67) | GCE monitoring instance configuration. | object({…}) | ✓ | | +| [vpc_config](variables.tf#L77) | Shared VPC project and VPC details. | object({…}) | ✓ | | +| [create_dashboards](variables.tf#L17) | Specify sample GCVE monitoring dashboards should be installed. | bool | | true | +| [create_firewall_rule](variables.tf#L23) | Specify whether a firewall rule to allow Load Balancer Healthcheck should be implemented. | bool | | true | +| [initial_delay_sec](variables.tf#L34) | How long to delay checking for healthcheck upon initialization. | number | | 180 | +| [monitoring_image](variables.tf#L40) | Resource URI for OS image used to deploy monitoring agent. | string | | "projects/debian-cloud/global/images/family/debian-11" | +| [project_create](variables.tf#L46) | Project configuration for newly created project. Leave null to use existing project. Project creation forces VPC and cluster creation. | object({…}) | | null | +| [sa_gcve_monitoring](variables.tf#L61) | Service account for GCVE monitoring agent. | string | | "gcve-mon-sa" | +| [vsphere_secrets](variables.tf#L87) | Secret Manager secrets that contain vSphere credentials and FQDN. | object({…}) | | {} | + +## Outputs + +| name | description | sensitive | +|---|---|:---:| +| [gcve-mon-firewall](outputs.tf#L17) | Ingress rule to allow GCVE Syslog traffic. | | +| [gcve-mon-mig](outputs.tf#L22) | Managed Instance Group for GCVE Monitoring. | | +| [gcve-mon-sa](outputs.tf#L27) | Service Account for GCVE Monitoring. | | + diff --git a/blueprints/gcve/monitoring/dashboards/contention.json b/blueprints/gcve/monitoring/dashboards/contention.json new file mode 100644 index 0000000000..5baa07cf1c --- /dev/null +++ b/blueprints/gcve/monitoring/dashboards/contention.json @@ -0,0 +1,552 @@ +{ + "displayName": "GCVE Contention", + "mosaicLayout": { + "columns": 12, + "tiles": [ + { + "height": 3, + "widget": { + "title": "Available CPU in Mhz", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "LINE", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.cluster.cpu.available\" resource.type=\"generic_node\"" + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 6, + "yPos": 1 + }, + { + "height": 3, + "widget": { + "title": "Available Memory", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "STACKED_AREA", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.cluster.memory.available\" resource.type=\"generic_node\"" + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 6, + "xPos": 6, + "yPos": 1 + }, + { + "height": 1, + "widget": { + "text": { + "format": "RAW" + }, + "title": "Cluster" + }, + "width": 12 + }, + { + "height": 1, + "widget": { + "text": { + "format": "RAW" + }, + "title": "Hosts" + }, + "width": 12, + "yPos": 4 + }, + { + "height": 3, + "widget": { + "title": "CPU Used", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "LINE", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "perSeriesAligner": "ALIGN_MAX" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.host_system.cpu.used\" resource.type=\"generic_node\"" + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 6, + "yPos": 5 + }, + { + "height": 3, + "widget": { + "title": "Memory Used", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "STACKED_AREA", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "perSeriesAligner": "ALIGN_MAX" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.host_system.memory.used\" resource.type=\"generic_node\"" + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 6, + "xPos": 6, + "yPos": 5 + }, + { + "height": 1, + "widget": { + "text": { + "format": "RAW" + }, + "title": "Virtual Machines" + }, + "width": 12, + "yPos": 16 + }, + { + "height": 3, + "widget": { + "title": "Memory Usage Bytes", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "STACKED_AREA", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vm.memory.usage_bytes\" resource.type=\"generic_node\"", + "secondaryAggregation": {} + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 6, + "xPos": 6, + "yPos": 17 + }, + { + "height": 3, + "widget": { + "title": "CPU Usage in Mhz", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "LINE", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "perSeriesAligner": "ALIGN_MAX" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vm.cpu.usage\" resource.type=\"generic_node\"" + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 6, + "yPos": 17 + }, + { + "height": 3, + "widget": { + "title": "Disk Usage Bytes", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "STACKED_AREA", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "perSeriesAligner": "ALIGN_MAX" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vm.disk.usage_bytes\" resource.type=\"generic_node\"" + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 12, + "yPos": 26 + }, + { + "height": 3, + "widget": { + "title": "Memory Utilization", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "LINE", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "crossSeriesReducer": "REDUCE_PERCENTILE_95", + "groupByFields": [ + "resource.label.\"node_id\"" + ], + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.host_system.memory.utilization\" resource.type=\"generic_node\"", + "secondaryAggregation": {} + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 8, + "yPos": 11 + }, + { + "height": 3, + "widget": { + "scorecard": { + "gaugeView": { + "upperBound": 100 + }, + "timeSeriesQuery": { + "timeSeriesQueryLanguage": "fetch generic_node\n| metric\n 'external.googleapis.com/vmware/vcenter.host_system.memory.utilization'\n| group_by 1m, [value_utilization_max: max(value.utilization)]\n| every 1m\n| group_by [resource.node_id],\n [value_utilization_max_max: max(value_utilization_max)]\n| top 1" + } + }, + "title": "Peak Memory Utilization" + }, + "width": 4, + "xPos": 8, + "yPos": 11 + }, + { + "height": 3, + "widget": { + "title": "CPU Usage", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "LINE", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.host_system.cpu.usage\" resource.type=\"generic_node\"", + "secondaryAggregation": {} + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 8, + "yPos": 8 + }, + { + "height": 3, + "widget": { + "scorecard": { + "gaugeView": { + "upperBound": 100 + }, + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "crossSeriesReducer": "REDUCE_MEAN", + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.host_system.cpu.usage\" resource.type=\"generic_node\"" + } + } + }, + "title": "Peak CPU Usage" + }, + "width": 4, + "xPos": 8, + "yPos": 8 + }, + { + "height": 2, + "widget": { + "scorecard": { + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "crossSeriesReducer": "REDUCE_SUM", + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vm.memory.usage_bytes\" resource.type=\"generic_node\"" + } + } + }, + "title": "VM Total Memory Used" + }, + "width": 3, + "xPos": 3, + "yPos": 20 + }, + { + "height": 2, + "widget": { + "scorecard": { + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "crossSeriesReducer": "REDUCE_MEAN", + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vsphere.hosts.powered_on\" resource.type=\"generic_node\"" + } + } + }, + "title": "Powered On Hosts" + }, + "width": 2, + "xPos": 8, + "yPos": 14 + }, + { + "height": 2, + "widget": { + "scorecard": { + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "crossSeriesReducer": "REDUCE_MEAN", + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vsphere.hosts.powered_off\" resource.type=\"generic_node\"" + } + } + }, + "title": "Powered Off Hosts" + }, + "width": 2, + "xPos": 10, + "yPos": 14 + }, + { + "height": 2, + "widget": { + "scorecard": { + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "crossSeriesReducer": "REDUCE_MEAN", + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.cluster.memory.available\" resource.type=\"generic_node\"" + } + } + }, + "title": "Memory Available In Cluster" + }, + "width": 4, + "yPos": 14 + }, + { + "height": 2, + "widget": { + "scorecard": { + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "crossSeriesReducer": "REDUCE_SUM", + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.host_system.memory.used\" resource.type=\"generic_node\"" + } + } + }, + "title": "Memory Used By Hosts" + }, + "width": 4, + "xPos": 4, + "yPos": 14 + }, + { + "height": 2, + "widget": { + "scorecard": { + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "crossSeriesReducer": "REDUCE_MEAN", + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.cluster.memory.available\" resource.type=\"generic_node\"" + } + } + }, + "title": "Memory Available in Cluster" + }, + "width": 3, + "yPos": 20 + }, + { + "height": 2, + "widget": { + "scorecard": { + "gaugeView": { + "upperBound": 100 + }, + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "crossSeriesReducer": "REDUCE_MEAN", + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vm.cpu.used_percent\" resource.type=\"generic_node\"" + } + } + }, + "title": "Used CPU Percentage" + }, + "width": 6, + "xPos": 6, + "yPos": 20 + }, + { + "height": 4, + "widget": { + "title": "CPU Ready", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "LINE", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vm.cpu.ready\" resource.type=\"generic_node\"", + "secondaryAggregation": {} + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 12, + "yPos": 22 + } + ] + } + } diff --git a/blueprints/gcve/monitoring/dashboards/overview.json b/blueprints/gcve/monitoring/dashboards/overview.json new file mode 100644 index 0000000000..6c8439933a --- /dev/null +++ b/blueprints/gcve/monitoring/dashboards/overview.json @@ -0,0 +1,499 @@ +{ + "displayName": "GCVE Overview", + "mosaicLayout": { + "columns": 12, + "tiles": [ + { + "height": 3, + "widget": { + "title": "Memory Utilization", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "STACKED_BAR", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "perSeriesAligner": "ALIGN_MAX" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.host_system.memory.utilization\" resource.type=\"generic_node\"" + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 4, + "xPos": 4, + "yPos": 9 + }, + { + "height": 1, + "widget": { + "text": { + "format": "RAW" + }, + "title": "DATACENTER" + }, + "width": 12 + }, + { + "height": 3, + "widget": { + "title": "Average Host CPU Utilization", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "STACKED_AREA", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.datacenter.cpu.average_host_utilization\" resource.type=\"generic_node\"" + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 12, + "yPos": 5 + }, + { + "height": 2, + "widget": { + "scorecard": { + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "crossSeriesReducer": "REDUCE_SUM", + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vm.disk.usage_bytes\" resource.type=\"generic_node\"" + } + } + }, + "title": "Total Storage Used" + }, + "width": 3, + "yPos": 3 + }, + { + "height": 2, + "widget": { + "scorecard": { + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "crossSeriesReducer": "REDUCE_SUM", + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vm.memory.usage_bytes\" resource.type=\"generic_node\"" + } + } + }, + "title": "Total Memory Used " + }, + "width": 3, + "xPos": 3, + "yPos": 3 + }, + { + "height": 2, + "widget": { + "scorecard": { + "thresholds": [ + { + "color": "RED", + "direction": "ABOVE" + } + ], + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "crossSeriesReducer": "REDUCE_MAX", + "perSeriesAligner": "ALIGN_MAX" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.datacenter.hosts\" resource.type=\"generic_node\" metric.label.\"color\"=\"red\"" + } + } + }, + "title": "Red Hosts" + }, + "width": 3, + "xPos": 6, + "yPos": 3 + }, + { + "height": 3, + "widget": { + "title": "CPU Usage %", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "STACKED_AREA", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.host_system.cpu.usage\" resource.type=\"generic_node\"" + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 4, + "yPos": 9 + }, + { + "height": 2, + "widget": { + "scorecard": { + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "crossSeriesReducer": "REDUCE_SUM", + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.datacenter.virtual_machines\" resource.type=\"generic_node\"" + } + } + }, + "title": "Running VMs" + }, + "width": 3, + "xPos": 9, + "yPos": 3 + }, + { + "height": 3, + "widget": { + "title": "CPU Utilization Ratio", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "LINE", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.host_system.cpu.utilization_ratio\" resource.type=\"generic_node\"", + "secondaryAggregation": {} + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 4, + "xPos": 8, + "yPos": 9 + }, + { + "height": 1, + "widget": { + "text": { + "format": "RAW" + }, + "title": "HOSTS" + }, + "width": 12, + "yPos": 8 + }, + { + "height": 1, + "widget": { + "text": { + "format": "RAW" + }, + "title": "VMs" + }, + "width": 12, + "yPos": 12 + }, + { + "height": 3, + "widget": { + "title": "CPU Used %", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "LINE", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vm.cpu.used_percent\" resource.type=\"generic_node\"" + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 4, + "xPos": 8, + "yPos": 13 + }, + { + "height": 3, + "widget": { + "title": "Memory Used %", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "LINE", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vm.memory.used_percent\" resource.type=\"generic_node\"" + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 4, + "xPos": 4, + "yPos": 13 + }, + { + "height": 3, + "widget": { + "title": "Disk Used Bytes", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "STACKED_BAR", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vm.disk.usage_bytes\"" + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 4, + "yPos": 13 + }, + { + "height": 2, + "widget": { + "scorecard": { + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "crossSeriesReducer": "REDUCE_MEAN", + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vsphere.clusters.total\" resource.type=\"generic_node\"" + } + } + }, + "title": "Clusters Total" + }, + "width": 3, + "yPos": 1 + }, + { + "height": 2, + "widget": { + "scorecard": { + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "crossSeriesReducer": "REDUCE_MEAN", + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vsphere.datastores.total\" resource.type=\"generic_node\"" + } + } + }, + "title": "Datastores Total" + }, + "width": 3, + "xPos": 3, + "yPos": 1 + }, + { + "height": 2, + "widget": { + "scorecard": { + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "crossSeriesReducer": "REDUCE_MAX", + "perSeriesAligner": "ALIGN_MAX" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vsphere.hosts\" resource.type=\"generic_node\"" + } + } + }, + "title": "Hosts Total" + }, + "width": 3, + "xPos": 6, + "yPos": 1 + }, + { + "height": 2, + "widget": { + "scorecard": { + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "crossSeriesReducer": "REDUCE_MAX", + "perSeriesAligner": "ALIGN_MAX" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vsphere.virtual_machines.total\" resource.type=\"generic_node\"" + } + } + }, + "title": "VMs Total" + }, + "width": 3, + "xPos": 9, + "yPos": 1 + }, + { + "height": 4, + "widget": { + "title": "Memory Used Bytes", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "LINE", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vm.memory.usage_bytes\" resource.type=\"generic_node\"" + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 6, + "yPos": 16 + }, + { + "height": 4, + "widget": { + "title": "Network Throughput Bytes", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "STACKED_BAR", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vm.network.throughput_bytes\" resource.type=\"generic_node\"", + "secondaryAggregation": {} + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 6, + "xPos": 6, + "yPos": 16 + } + ] + } + } diff --git a/blueprints/gcve/monitoring/dashboards/virtual-machine-performance.json b/blueprints/gcve/monitoring/dashboards/virtual-machine-performance.json new file mode 100644 index 0000000000..57ff7b3aff --- /dev/null +++ b/blueprints/gcve/monitoring/dashboards/virtual-machine-performance.json @@ -0,0 +1,240 @@ +{ + "displayName": "GCVE Virtual Machine Performance", + "mosaicLayout": { + "columns": 12, + "tiles": [ + { + "height": 4, + "widget": { + "title": "CPU Usage", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "LINE", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vm.cpu.usage\" resource.type=\"generic_node\"" + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 6 + }, + { + "height": 4, + "widget": { + "title": "Disk Space Used", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "STACKED_BAR", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vm.disk.usage_bytes\" resource.type=\"generic_node\"" + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 6, + "xPos": 6, + "yPos": 8 + }, + { + "height": 4, + "widget": { + "title": "Memory Utilization", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "LINE", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vm.memory.used_percent\" resource.type=\"generic_node\"" + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 6, + "yPos": 4 + }, + { + "height": 4, + "widget": { + "title": "Network Throughput Bytes", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "LINE", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vm.network.throughput_bytes\" resource.type=\"generic_node\"" + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 6, + "xPos": 6, + "yPos": 4 + }, + { + "height": 4, + "widget": { + "title": "CPU Utilization", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "LINE", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vm.cpu.used_percent\" resource.type=\"generic_node\"" + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 6, + "xPos": 6 + }, + { + "height": 4, + "widget": { + "title": "Disk Space Free", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "STACKED_BAR", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vm.disk.free_bytes\" resource.type=\"generic_node\"" + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 6, + "yPos": 8 + }, + { + "height": 4, + "widget": { + "title": "Disk Utilization", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "minAlignmentPeriod": "60s", + "plotType": "LINE", + "timeSeriesQuery": { + "timeSeriesFilter": { + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_MEAN" + }, + "filter": "metric.type=\"external.googleapis.com/vmware/vcenter.vm.disk.used_percent\" resource.type=\"generic_node\"" + } + } + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + } + } + }, + "width": 6, + "yPos": 12 + } + ] + } + } diff --git a/blueprints/gcve/monitoring/gcve-mon-diagram.png b/blueprints/gcve/monitoring/gcve-mon-diagram.png new file mode 100644 index 0000000000..a7bbd19450 Binary files /dev/null and b/blueprints/gcve/monitoring/gcve-mon-diagram.png differ diff --git a/blueprints/gcve/monitoring/main.tf b/blueprints/gcve/monitoring/main.tf new file mode 100644 index 0000000000..4c5e6789a4 --- /dev/null +++ b/blueprints/gcve/monitoring/main.tf @@ -0,0 +1,168 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +locals { + base_gcve_agent_endpoint = "https://storage.googleapis.com/gcve-observability-agent/latest/vmware-linux-amd64" + base_gcloud_secret_manager = "gcloud secrets versions access latest --secret=" + sa_gcve_monitoring_roles = toset([ + "roles/secretmanager.secretAccessor", + "roles/monitoring.admin", + "roles/logging.logWriter", + ]) + use_shared_vpc = ( + try(var.project_create.shared_vpc_host, null) != null + ) + vpc_name = split("/", var.vpc_config.vpc_self_link)[length(split("/", var.vpc_config.vpc_self_link)) - 1] +} + +module "project" { + source = "../../../modules/project" + parent = try(var.project_create.parent, null) + billing_account = try(var.project_create.billing_account, null) + name = var.project_id + project_create = var.project_create != null + services = [ + "compute.googleapis.com", + "monitoring.googleapis.com", + "logging.googleapis.com", + "secretmanager.googleapis.com" + ] + shared_vpc_service_config = !local.use_shared_vpc ? null : { + //attach = true + host_project = var.project_create.shared_vpc_host + service_iam_grants = module.project.services + } +} + +module "sa_gcve_monitoring" { + source = "../../../modules/iam-service-account" + project_id = var.project_id + name = var.sa_gcve_monitoring + iam_project_roles = { + "${var.project_id}" = [ + "roles/secretmanager.secretAccessor", + "roles/monitoring.admin", + "roles/logging.logWriter", + ] + } +} + +module "gcve-mon-template" { + source = "../../../modules/compute-vm" + project_id = var.project_id + name = "gcve-mon-template" + zone = var.vm_mon_config.vm_mon_zone + instance_type = var.vm_mon_config.vm_mon_type + create_template = true + can_ip_forward = false + network_interfaces = [ + { + network = var.vpc_config.vpc_self_link + subnetwork = var.vpc_config.subnetwork_self_link + nat = false + addresses = null + } + ] + boot_disk = { + initialize_params = { + image = var.monitoring_image + size = 100 + type = "pd-balanced" + } + } + options = { + allow_stopping_for_update = true + deletion_protection = false + spot = false + termination_action = "STOP" + } + + metadata = { + startup-script = templatefile("${path.module}/scripts/installer.sh", + { + endpoint_agent = "${local.base_gcve_agent_endpoint}/artifacts/bpagent-headless-vmware.tar.gz" + endpoint_install = "${local.base_gcve_agent_endpoint}/installer/install.sh" + gcloud_secret_vsphere_server = "${local.base_gcloud_secret_manager}${var.vsphere_secrets.vsphere_server}" + gcloud_secret_vsphere_user = "${local.base_gcloud_secret_manager}${var.vsphere_secrets.vsphere_user}" + gcloud_secret_vsphere_password = "${local.base_gcloud_secret_manager}${var.vsphere_secrets.vsphere_password}" + gcve_region = var.gcve_region + project_id = var.project_id + }) + } + + service_account = { + email = module.sa_gcve_monitoring.email + scopes = ["https://www.googleapis.com/auth/cloud-platform"] + } +} + +module "gcve-mon-mig" { + source = "../../../modules/compute-mig" + project_id = var.project_id + location = var.gcve_region + name = "${var.vm_mon_config.vm_mon_name}-mig" + instance_template = module.gcve-mon-template.template.self_link + target_size = 1 + auto_healing_policies = { + initial_delay_sec = var.initial_delay_sec + } + health_check_config = { + enable_logging = true + tcp = { + port = 5142 + } + } +} + +module "secret-manager" { + source = "../../../modules/secret-manager" + project_id = var.project_id + secrets = { + (var.vsphere_secrets.vsphere_server) = { locations = [var.gcve_region] }, + (var.vsphere_secrets.vsphere_user) = { locations = [var.gcve_region] }, + (var.vsphere_secrets.vsphere_password) = { locations = [var.gcve_region] } + } +} + +module "firewall" { + source = "../../../modules/net-vpc-firewall" + count = var.create_firewall_rule ? 1 : 0 + + project_id = var.vpc_config.host_project_id + network = local.vpc_name + default_rules_config = { + disabled = true + } + + ingress_rules = { + allow-healthcheck = { + description = "Allow healthcheck for Syslog port." + source_ranges : ["35.191.0.0/16", "130.211.0.0/22"] + targets : [module.sa_gcve_monitoring.email] + use_service_accounts : true + rules = [{ + protocol = "tcp" + ports = [5142] + }] + } + } +} + +resource "google_monitoring_dashboard" "gcve_mon_dashboards" { + for_each = var.create_dashboards ? fileset("${path.module}/dashboards", "*.json") : [] + dashboard_json = file("${path.module}/dashboards/${each.value}") + project = var.project_id +} diff --git a/blueprints/gcve/monitoring/outputs.tf b/blueprints/gcve/monitoring/outputs.tf new file mode 100644 index 0000000000..4dc233865e --- /dev/null +++ b/blueprints/gcve/monitoring/outputs.tf @@ -0,0 +1,30 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +output "gcve-mon-firewall" { + description = "Ingress rule to allow GCVE Syslog traffic." + value = module.firewall +} + +output "gcve-mon-mig" { + description = "Managed Instance Group for GCVE Monitoring." + value = module.gcve-mon-mig +} + +output "gcve-mon-sa" { + description = "Service Account for GCVE Monitoring." + value = module.sa_gcve_monitoring +} diff --git a/blueprints/gcve/monitoring/scripts/installer.sh b/blueprints/gcve/monitoring/scripts/installer.sh new file mode 100644 index 0000000000..8db63606c4 --- /dev/null +++ b/blueprints/gcve/monitoring/scripts/installer.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# BindPlane Agent with Terraform +# https://github.com/GoogleCloudPlatform/monitoring-dashboard-samples/tree/master/terraform/agents/bindplane + +# Install prerequisites +sudo apt-get install -y rsync + +# Setting up Cloud Monitoring with a standalone agent +# https://cloud.google.com/vmware-engine/docs/environment/howto-cloud-monitoring-standalone +curl -s ${endpoint_agent} -o /tmp/agent.tar.gz +curl -s ${endpoint_install} -o /tmp/install.sh +sudo chmod +x /tmp/install.sh +sudo /tmp/install.sh /tmp/agent.tar.gz + +# Configure the agent to access your private cloud for metrics +sudo cp /opt/bpagent/config/metrics/examples/vmware_vcenter.yaml /opt/bpagent/config/metrics/sources +gcloud config set project ${project_id} +sudo sed -i "s/host:.*$/host: $(${gcloud_secret_vsphere_server})/g" /opt/bpagent/config/metrics/sources/vmware_vcenter.yaml +sudo sed -i "s/username:.*$/username: $(${gcloud_secret_vsphere_user})/g" /opt/bpagent/config/metrics/sources/vmware_vcenter.yaml +sudo sed -i "s/password:.*$/password: $(${gcloud_secret_vsphere_password})/g" /opt/bpagent/config/metrics/sources/vmware_vcenter.yaml +sudo sed -i "s/# region:.*$/region: ${gcve_region}/g" /opt/bpagent/config/metrics/sources/vmware_vcenter.yaml + +#Configure the agent to access the service account for reporting +sudo cp /opt/bpagent/config/log_agent.example.yaml /opt/bpagent/config/log_agent.yaml +sudo sed -i "s/project_id:.*$/project_id: ${project_id}/g" /opt/bpagent/config/log_agent.yaml +sudo sed -i "s/credentials_file:.*$/#credentials_file: /g" /opt/bpagent/config/log_agent.yaml + +sudo systemctl stop bpagent +sudo systemctl start bpagent +sudo systemctl enable bpagent \ No newline at end of file diff --git a/blueprints/gcve/monitoring/variables.tf b/blueprints/gcve/monitoring/variables.tf new file mode 100644 index 0000000000..6104da0578 --- /dev/null +++ b/blueprints/gcve/monitoring/variables.tf @@ -0,0 +1,96 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +variable "create_dashboards" { + description = "Specify sample GCVE monitoring dashboards should be installed." + type = bool + default = true +} + +variable "create_firewall_rule" { + description = "Specify whether a firewall rule to allow Load Balancer Healthcheck should be implemented." + type = bool + default = true +} + +variable "gcve_region" { + description = "Region where the Private Cloud is deployed." + type = string +} + +variable "initial_delay_sec" { + description = "How long to delay checking for healthcheck upon initialization." + type = number + default = 180 +} + +variable "monitoring_image" { + description = "Resource URI for OS image used to deploy monitoring agent." + type = string + default = "projects/debian-cloud/global/images/family/debian-11" +} + +variable "project_create" { + description = "Project configuration for newly created project. Leave null to use existing project. Project creation forces VPC and cluster creation." + type = object({ + billing_account = string + parent = optional(string) + shared_vpc_host = optional(string) + }) + default = null +} + +variable "project_id" { + description = "Project id of existing or created project." + type = string +} + +variable "sa_gcve_monitoring" { + description = "Service account for GCVE monitoring agent." + type = string + default = "gcve-mon-sa" +} + +variable "vm_mon_config" { + description = "GCE monitoring instance configuration." + type = object({ + vm_mon_name = optional(string, "bp-agent") + vm_mon_type = optional(string, "e2-small") + vm_mon_zone = string + }) + nullable = false +} + +variable "vpc_config" { + description = "Shared VPC project and VPC details." + type = object({ + host_project_id = string + vpc_self_link = string + subnetwork_self_link = string + }) + nullable = false +} + +variable "vsphere_secrets" { + description = "Secret Manager secrets that contain vSphere credentials and FQDN." + type = object({ + vsphere_password = optional(string, "gcve-mon-vsphere-password") + vsphere_server = optional(string, "gcve-mon-vsphere-server") + vsphere_user = optional(string, "gcve-mon-vsphere-user") + }) + nullable = false + default = {} +}