diff --git a/.ci/dashboards/prometheus.json b/.ci/dashboards/prometheus.json new file mode 100644 index 0000000..283821b --- /dev/null +++ b/.ci/dashboards/prometheus.json @@ -0,0 +1,3560 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "enable": true, + "expr": "sum(changes(prometheus_config_last_reload_success_timestamp_seconds{instance=~\"$instance\"}[10m])) by (instance)", + "hide": false, + "iconColor": "rgb(0, 96, 19)", + "limit": 100, + "name": "reloads", + "showIn": 0, + "step": "5m", + "type": "alert" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "enable": true, + "expr": "count(sum(up{instance=\"$instance\"}) by (instance) < 1)", + "hide": false, + "iconColor": "rgba(255, 96, 96, 1)", + "limit": 100, + "name": "down", + "showIn": 0, + "step": "5m", + "type": "alert" + } + ] + }, + "description": "Get started faster with Grafana Cloud then easily build these dashboards. https://grafana.com/products/cloud/\nOverview of metrics from Prometheus 2.0. \nUseful for using prometheus to monitor your prometheus.\nRevisions welcome!", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 4, + "links": [ + { + "asDropdown": false, + "icon": "doc", + "includeVars": true, + "keepTime": true, + "tags": [], + "targetBlank": true, + "title": "Report", + "tooltip": "Create a PDF report", + "type": "link", + "url": "/api/plugins/mahendrapaipuri-dashboardreporter-app/resources/report?dashUid=b3228ada-fd89-4aed-8605-d5f7b95aa237" + } + ], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 34, + "panels": [], + "title": "at a glance", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "Percentage of uptime during the most recent $interval period. Change the period with the 'interval' dropdown above.", + "fieldConfig": { + "defaults": { + "decimals": 3, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(245, 54, 54, 0.9)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 90 + }, + { + "color": "rgba(50, 172, 45, 0.97)", + "value": 99 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 1 + }, + "id": 2, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "avg(avg_over_time(up{instance=~\"$instance\",job=~\"$job\"}[$interval]) * 100)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 40 + } + ], + "title": "Uptime [$interval]", + "type": "stat" + }, + { + "columns": [], + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "Servers which are DOWN RIGHT NOW! \nFIX THEM!!", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 1 + }, + "hideTimeOverride": true, + "id": 25, + "options": {}, + "pluginVersion": "11.4.0", + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "align": "auto", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "", + "align": "auto", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/__name__|job|Value/", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": " ", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(255, 0, 0, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(255, 0, 0, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "pattern": "instance", + "thresholds": [ + "", + "", + "" + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "up{instance=~\"$instance\",job=~\"$job\"} < 1", + "format": "table", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "timeFrom": "1s", + "title": "Currently Down", + "transform": "table", + "type": "table-old" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "Total number of time series in prometheus", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1000000 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 2000000 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 1 + }, + "id": 12, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(prometheus_tsdb_head_series{job=~\"$job\",instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "refId": "B", + "step": 40 + } + ], + "title": "Total Series", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "rgb(31, 120, 193)", + "mode": "fixed" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 1 + }, + "id": 14, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(prometheus_tsdb_head_chunks{job=~\"$job\",instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "refId": "B", + "step": 40 + } + ], + "title": "Memory Chunks", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 35, + "panels": [], + "title": "quick numbers", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "The total number of rule group evaluations missed due to slow rule group evaluation.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 10 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 9 + }, + "id": 16, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(sum_over_time(prometheus_evaluator_iterations_missed_total{job=~\"$job\",instance=~\"$instance\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 40 + } + ], + "title": "Missed Iterations [$interval]", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "The total number of rule group evaluations skipped due to throttled metric storage.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 10 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 4, + "y": 9 + }, + "id": 18, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(sum_over_time(prometheus_evaluator_iterations_skipped_total{job=~\"$job\",instance=~\"$instance\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 40 + } + ], + "title": "Skipped Iterations [$interval]", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "Total number of scrapes that hit the sample limit and were rejected.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 10 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 8, + "y": 9 + }, + "id": 19, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(sum_over_time(prometheus_target_scrapes_exceeded_sample_limit_total{job=~\"$job\",instance=~\"$instance\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 40 + } + ], + "title": "Tardy Scrapes [$interval]", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "Number of times the database failed to reload block data from disk.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 10 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 12, + "y": 9 + }, + "id": 13, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(sum_over_time(prometheus_tsdb_reloads_failures_total{job=~\"$job\",instance=~\"$instance\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 40 + } + ], + "title": "Reload Failures [$interval]", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "Sum of all skipped scrapes", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 10 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 9 + }, + "id": 20, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(sum_over_time(prometheus_target_scrapes_exceeded_sample_limit_total{job=~\"$job\",instance=~\"$instance\"}[$interval])) + \nsum(sum_over_time(prometheus_target_scrapes_sample_duplicate_timestamp_total{job=~\"$job\",instance=~\"$instance\"}[$interval])) + \nsum(sum_over_time(prometheus_target_scrapes_sample_out_of_bounds_total{job=~\"$job\",instance=~\"$instance\"}[$interval])) + \nsum(sum_over_time(prometheus_target_scrapes_sample_out_of_order_total{job=~\"$job\",instance=~\"$instance\"}[$interval])) ", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 40 + } + ], + "title": "Skipped Scrapes [$interval]", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 36, + "panels": [], + "title": "errors", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "All non-zero failures and errors", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Errors", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 33, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(increase(net_conntrack_dialer_conn_failed_total{instance=~\"$instance\"}[5m])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Failed Connections", + "refId": "A", + "step": 2 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(increase(prometheus_evaluator_iterations_missed_total{instance=~\"$instance\"}[5m])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Missed Iterations", + "refId": "B", + "step": 2 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(increase(prometheus_evaluator_iterations_skipped_total{instance=~\"$instance\"}[5m])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Skipped Iterations", + "refId": "C", + "step": 2 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(increase(prometheus_rule_evaluation_failures_total{instance=~\"$instance\"}[5m])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Evaluation", + "refId": "D", + "step": 2 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(increase(prometheus_sd_azure_refresh_failures_total{instance=~\"$instance\"}[5m])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Azure Refresh", + "refId": "E", + "step": 2 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(increase(prometheus_sd_consul_rpc_failures_total{instance=~\"$instance\"}[5m])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Consul RPC", + "refId": "F", + "step": 2 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(increase(prometheus_sd_dns_lookup_failures_total{instance=~\"$instance\"}[5m])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "DNS Lookup", + "refId": "G", + "step": 2 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(increase(prometheus_sd_ec2_refresh_failures_total{instance=~\"$instance\"}[5m])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "EC2 Refresh", + "refId": "H", + "step": 2 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(increase(prometheus_sd_gce_refresh_failures_total{instance=~\"$instance\"}[5m])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "GCE Refresh", + "refId": "I", + "step": 2 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(increase(prometheus_sd_marathon_refresh_failures_total{instance=~\"$instance\"}[5m])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Marathon Refresh", + "refId": "J", + "step": 2 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(increase(prometheus_sd_openstack_refresh_failures_total{instance=~\"$instance\"}[5m])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Openstack Refresh", + "refId": "K", + "step": 2 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(increase(prometheus_sd_triton_refresh_failures_total{instance=~\"$instance\"}[5m])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Triton Refresh", + "refId": "L", + "step": 2 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance=~\"$instance\"}[5m])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Sample Limit", + "refId": "M", + "step": 2 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{instance=~\"$instance\"}[5m])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Duplicate Timestamp", + "refId": "N", + "step": 2 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_bounds_total{instance=~\"$instance\"}[5m])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Timestamp Out of Bounds", + "refId": "O", + "step": 2 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_order_total{instance=~\"$instance\"}[5m])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Sample Out of Order", + "refId": "P", + "step": 2 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(increase(prometheus_treecache_zookeeper_failures_total{instance=~\"$instance\"}[5m])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Zookeeper", + "refId": "Q", + "step": 2 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(increase(prometheus_tsdb_compactions_failed_total{instance=~\"$instance\"}[5m])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "TSDB Compactions", + "refId": "R", + "step": 2 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(increase(prometheus_tsdb_head_series_not_found{instance=~\"$instance\"}[5m])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Series Not Found", + "refId": "S", + "step": 2 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(increase(prometheus_tsdb_reloads_failures_total{instance=~\"$instance\"}[5m])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Reload", + "refId": "T", + "step": 2 + } + ], + "title": "Failures and Errors", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 37, + "panels": [], + "title": "up", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Up", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "up{instance=~\"$instance\",job=~\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 2 + } + ], + "title": "Upness (stacked)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Chunks", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 25 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "prometheus_tsdb_head_chunks{job=~\"$job\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 4 + } + ], + "title": "Storage Memory Chunks", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 38, + "panels": [], + "title": "series", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Series", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 33 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "prometheus_tsdb_head_series{job=~\"$job\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 4 + } + ], + "title": "Series Count", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Series Count", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "removed" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 32, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum( increase(prometheus_tsdb_head_series_created_total{instance=~\"$instance\"}[5m]) )", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "created", + "refId": "A", + "step": 4 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum( increase(prometheus_tsdb_head_series_removed_total{instance=~\"$instance\"}[5m]) )", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "removed", + "refId": "B", + "step": 4 + } + ], + "title": "Series Created / Removed", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 39, + "panels": [], + "title": "appended samples", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "Rate of total number of appended samples", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Samples / Second", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "10.58.3.10:80" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BA43A9", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 41 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "rate(prometheus_tsdb_head_samples_appended_total{job=~\"$job\",instance=~\"$instance\"}[1m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 2 + } + ], + "title": "Appended Samples per Second", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 48 + }, + "id": 40, + "panels": [], + "title": "sync", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "Total number of syncs that were executed on a scrape pool.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Syncs", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 49 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(prometheus_target_scrape_pool_sync_total{job=~\"$job\",instance=~\"$instance\"}) by (scrape_job)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{scrape_job}}", + "refId": "B", + "step": 4 + } + ], + "title": "Scrape Sync Total", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "Actual interval to sync the scrape pool.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Milliseconds", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 21, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(rate(prometheus_target_sync_length_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[2m])) by (scrape_job) * 1000", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{scrape_job}}", + "refId": "A", + "step": 4 + } + ], + "title": "Target Sync", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 56 + }, + "id": 41, + "panels": [], + "title": "scrapes", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Seconds", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 57 + }, + "id": 29, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "scrape_duration_seconds{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 4 + } + ], + "title": "Scrape Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "Total number of rejected scrapes", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Scrapes", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 57 + }, + "id": 30, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(prometheus_target_scrapes_exceeded_sample_limit_total{job=~\"$job\",instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "exceeded sample limit", + "refId": "A", + "step": 4 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(prometheus_target_scrapes_sample_duplicate_timestamp_total{job=~\"$job\",instance=~\"$instance\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "duplicate timestamp", + "refId": "B", + "step": 4 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(prometheus_target_scrapes_sample_out_of_bounds_total{job=~\"$job\",instance=~\"$instance\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "out of bounds", + "refId": "C", + "step": 4 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(prometheus_target_scrapes_sample_out_of_order_total{job=~\"$job\",instance=~\"$instance\"}) ", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "out of order", + "refId": "D", + "step": 4 + } + ], + "title": "Rejected Scrapes", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 64 + }, + "id": 42, + "panels": [], + "title": "durations", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "The duration of rule group evaluations", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Milliseconds", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 65 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "1000 * rate(prometheus_evaluator_duration_seconds_sum{job=~\"$job\", instance=~\"$instance\"}[5m]) / rate(prometheus_evaluator_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "E", + "step": 4 + } + ], + "title": "Average Rule Evaluation Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Microseconds", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 65 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(rate(http_request_duration_microseconds_count{job=~\"$job\",instance=~\"$instance\"}[1m])) by (handler) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{handler}}", + "refId": "A", + "step": 4 + } + ], + "title": "HTTP Request Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Seconds", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 72 + }, + "id": 15, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(prometheus_engine_query_duration_seconds_sum{job=~\"$job\",instance=~\"$instance\"}) by (slice)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{slice}}", + "refId": "A", + "step": 4 + } + ], + "title": "Prometheus Engine Query Duration Seconds", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "Rule-group evaluations \n - total\n - missed due to slow rule group evaluation\n - skipped due to throttled metric storage", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "iterations", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 72 + }, + "id": 31, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(rate(prometheus_evaluator_iterations_total{job=~\"$job\", instance=~\"$instance\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Total", + "refId": "B", + "step": 4 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(rate(prometheus_evaluator_iterations_missed_total{job=~\"$job\", instance=~\"$instance\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Missed", + "refId": "A", + "step": 4 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(rate(prometheus_evaluator_iterations_skipped_total{job=~\"$job\", instance=~\"$instance\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Skipped", + "refId": "C", + "step": 4 + } + ], + "title": "Rule Evaluator Iterations", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 79 + }, + "id": 43, + "panels": [], + "title": "notifications", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Notifications", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 80 + }, + "id": 22, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "rate(prometheus_notifications_sent_total[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 2 + } + ], + "title": "Notifications Sent", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 87 + }, + "id": 44, + "panels": [], + "title": "config", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Minutes", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 88 + }, + "id": 23, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "(time() - prometheus_config_last_reload_success_timestamp_seconds{job=~\"$job\",instance=~\"$instance\"}) / 60", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 4 + } + ], + "title": "Minutes Since Successful Config Reload", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Success", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 88 + }, + "id": 24, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "prometheus_config_last_reload_successful{job=~\"$job\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 4 + } + ], + "title": "Successful Config Reload", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 95 + }, + "id": 45, + "panels": [], + "title": "garbage collection", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "GC invocation durations", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 96 + }, + "id": 28, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(rate(go_gc_duration_seconds_sum{instance=~\"$instance\",job=~\"$job\"}[2m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 2 + } + ], + "title": "GC Rate / 2m", + "type": "timeseries" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 103 + }, + "id": 46, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "This is probably wrong! Please help.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 104 + }, + "id": 26, + "options": {}, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(go_memstats_alloc_bytes_total{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "alloc_bytes_total", + "refId": "A", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(go_memstats_alloc_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "allocated", + "refId": "B", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(go_memstats_buck_hash_sys_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "profiling bucket hash table", + "refId": "C", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(go_memstats_gc_sys_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "GC metadata", + "refId": "D", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(go_memstats_heap_alloc_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "heap in-use", + "refId": "E", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(go_memstats_heap_idle_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "heap idle", + "refId": "F", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(go_memstats_heap_inuse_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "heap in use", + "refId": "G", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(go_memstats_heap_released_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "heap released", + "refId": "H", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(go_memstats_heap_sys_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "heap system", + "refId": "I", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(go_memstats_mcache_inuse_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "mcache in use", + "refId": "J", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(go_memstats_mcache_sys_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "mcache sys", + "refId": "K", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(go_memstats_mspan_inuse_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "mspan in use", + "refId": "L", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(go_memstats_mspan_sys_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "mspan sys", + "refId": "M", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(go_memstats_next_gc_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "heap next gc", + "refId": "N", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(go_memstats_other_sys_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "other sys", + "refId": "O", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(go_memstats_stack_inuse_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "stack in use", + "refId": "P", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(go_memstats_stack_sys_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "stack sys", + "refId": "Q", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(go_memstats_sys_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "sys", + "refId": "R", + "step": 10 + } + ], + "title": "Go Memory Usage (FIXME)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 104 + }, + "id": 9, + "options": {}, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "prometheus_target_interval_length_seconds{instance=~\"$instance\", job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{quantile}} {{interval}}", + "refId": "A", + "step": 20 + } + ], + "title": "Scrape Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 104 + }, + "id": 7, + "options": {}, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "expr": "sum(rate(prometheus_target_interval_length_seconds_count{job=~\"$job\",instance=~\"$instance\"}[5m])) by (interval)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{interval}}", + "refId": "A", + "step": 20 + } + ], + "title": "Target Scrapes / 5m", + "type": "timeseries" + } + ], + "title": "Broken, ignore", + "type": "row" + } + ], + "preload": false, + "refresh": "30s", + "schemaVersion": 40, + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "definition": "query_result(prometheus_tsdb_head_samples_appended_total)", + "includeAll": true, + "multi": true, + "name": "job", + "options": [], + "query": { + "qryType": 3, + "query": "query_result(prometheus_tsdb_head_samples_appended_total)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "/.*job=\"([^\"]+)/", + "sort": 1, + "type": "query" + }, + { + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "definition": "query_result(up{job=~\"$job\"})", + "includeAll": true, + "multi": true, + "name": "instance", + "options": [], + "query": { + "qryType": 3, + "query": "query_result(up{job=~\"$job\"})", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "/.*instance=\"([^\"]+).*/", + "type": "query" + }, + { + "current": { + "text": "1d", + "value": "1d" + }, + "includeAll": false, + "name": "interval", + "options": [ + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "3h", + "value": "3h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": true, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "2d", + "value": "2d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + }, + { + "selected": false, + "text": "90d", + "value": "90d" + }, + { + "selected": false, + "text": "180d", + "value": "180d" + } + ], + "query": "1h, 3h, 6h, 12h, 1d, 2d, 7d, 30d, 90d, 180d", + "type": "custom" + }, + { + "current": { + "text": "Prometheus", + "value": "PBFA97CFB590B2093" + }, + "hide": 2, + "name": "ds", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-60d", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Prometheus 2.0 Overview", + "uid": "b3228ada-fd89-4aed-8605-d5f7b95aa237", + "version": 2, + "weekStart": "" +} diff --git a/.ci/datasources/sample.yml b/.ci/datasources/sample.yml new file mode 100644 index 0000000..6e169e4 --- /dev/null +++ b/.ci/datasources/sample.yml @@ -0,0 +1,15 @@ +apiVersion: 1 + +# A sample Prometheus datasource for testing purposes. +datasources: + - access: proxy + isDefault: true + jsonData: + cacheLevel: Medium + incrementalQuerying: true + prometheusType: Prometheus + prometheusVersion: 2.53.1 + timeInterval: 10s + name: Prometheus + type: prometheus + url: https://prometheus.demo.do.prometheus.io diff --git a/.ci/docker-compose.yaml b/.ci/docker-compose.yaml index c1ccfa4..a004a31 100644 --- a/.ci/docker-compose.yaml +++ b/.ci/docker-compose.yaml @@ -8,12 +8,13 @@ services: context: ../.config args: grafana_image: ${GRAFANA_IMAGE:-grafana-oss} - grafana_version: ${GRAFANA_VERSION:-11.3.0} + grafana_version: ${GRAFANA_VERSION:-11.4.0} ports: - 3080:${GF_SERVER_HTTP_PORT:-3000}/tcp volumes: - ../dist:/var/lib/grafana/plugins/mahendrapaipuri-dashboardreporter-app - ./dashboards:/etc/grafana/provisioning/dashboards + - ./datasources:/etc/grafana/provisioning/datasources # Dont set config in provisioning just to ensure that plugin works without any # extra config - ./config/plain:/etc/grafana/provisioning/plugins @@ -31,7 +32,7 @@ services: # allow anonymous admin so we don't have to set up a password to start testing - GF_AUTH_ANONYMOUS_ENABLED=false - GF_AUTH_BASIC_ENABLED=true - #- GF_AUTH_ANONYMOUS_ORG_ROLE=Admin + # - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin # skip login page # - GF_AUTH_DISABLE_LOGIN_FORM=true # We need to toggle external service accounts so that Grafana will get @@ -41,6 +42,8 @@ services: # disable alerting because it vomits logs - GF_ALERTING_ENABLED=false - GF_UNIFIED_ALERTING_ENABLED=false + - GF_LIVE_MAX_CONNECTIONS=0 + - GF_PLUGINS_DISABLE_PLUGINS=grafana-lokiexplore-app # Grafana image renderer - GF_RENDERING_SERVER_URL=http://renderer_plain:8081/render - GF_RENDERING_CALLBACK_URL=http://grafana_plain:${GF_SERVER_HTTP_PORT:-3000}/ @@ -49,6 +52,7 @@ services: # Set CI mode to remove header in report - __REPORTER_APP_CI_MODE=true - GF_REPORTER_PLUGIN_REMOTE_CHROME_URL=${GF_REPORTER_PLUGIN_REMOTE_CHROME_URL:-} + - GF_REPORTER_PLUGIN_NATIVE_RENDERER=${GF_REPORTER_PLUGIN_NATIVE_RENDERER:-false} renderer_plain: image: grafana/grafana-image-renderer:latest @@ -74,12 +78,13 @@ services: context: ../.config args: grafana_image: ${GRAFANA_IMAGE:-grafana-oss} - grafana_version: ${GRAFANA_VERSION:-11.3.0} + grafana_version: ${GRAFANA_VERSION:-11.4.0} ports: - 3443:${GF_SERVER_HTTP_PORT:-3000}/tcp volumes: - ../dist:/var/lib/grafana/plugins/mahendrapaipuri-dashboardreporter-app - ./dashboards:/etc/grafana/provisioning/dashboards + - ./datasources:/etc/grafana/provisioning/datasources - ./config/tls:/etc/grafana/provisioning/plugins - ./certs:/etc/grafana/tls - ./runtime/tls:/srv @@ -105,6 +110,8 @@ services: # disable alerting because it vomits logs - GF_ALERTING_ENABLED=false - GF_UNIFIED_ALERTING_ENABLED=false + - GF_LIVE_MAX_CONNECTIONS=0 + - GF_PLUGINS_DISABLE_PLUGINS=grafana-lokiexplore-app # TLS - GF_SERVER_PROTOCOL=https - GF_SERVER_CERT_KEY=/etc/grafana/tls/localhost.key @@ -117,6 +124,7 @@ services: # Set CI mode to remove header in report - __REPORTER_APP_CI_MODE=true - GF_REPORTER_PLUGIN_REMOTE_CHROME_URL=${GF_REPORTER_PLUGIN_REMOTE_CHROME_URL:-} + - GF_REPORTER_PLUGIN_NATIVE_RENDERER=${GF_REPORTER_PLUGIN_NATIVE_RENDERER:-false} - GF_REPORTER_PLUGIN_SKIP_TLS_CHECK=true renderer_tls: diff --git a/.github/workflows/step_e2e-tests.yml b/.github/workflows/step_e2e-tests.yml index 54d8554..353a2ed 100644 --- a/.github/workflows/step_e2e-tests.yml +++ b/.github/workflows/step_e2e-tests.yml @@ -18,6 +18,7 @@ jobs: - grafana-version: 10.3.0 remote-chrome-url: '' feature-flags: 'accessControlOnCall,idForwarding,externalServiceAccounts' + native-rendering: false # snapshots-folder: local-chrome name: local-chrome-10.3.0-with-features @@ -25,6 +26,7 @@ jobs: - grafana-version: 10.4.5 remote-chrome-url: '' feature-flags: 'accessControlOnCall,idForwarding,externalServiceAccounts' + native-rendering: true # snapshots-folder: local-chrome name: local-chrome-10.4.5-with-features @@ -33,6 +35,7 @@ jobs: - grafana-version: 10.4.7 remote-chrome-url: ws://localhost:9222 feature-flags: 'externalServiceAccounts' + native-rendering: false # snapshots-folder: remote-chrome name: remote-chrome-10.4.7-without-features @@ -40,15 +43,25 @@ jobs: - grafana-version: 11.1.0 remote-chrome-url: ws://localhost:9222 feature-flags: 'accessControlOnCall,idForwarding,externalServiceAccounts' + native-rendering: false # snapshots-folder: remote-chrome name: remote-chrome-11.1.0-with-features - # Latest Grafana with local chrome - - grafana-version: 11.3.0 + # Latest Grafana with local chrome and grafana-image-renderer + - grafana-version: 11.4.0 remote-chrome-url: ws://localhost:9222 feature-flags: 'accessControlOnCall,idForwarding,externalServiceAccounts' + native-rendering: false # snapshots-folder: remote-chrome - name: local-chrome-11.3.0-with-features + name: local-chrome-11.4.0-with-features + + # Latest Grafana with local chrome and native-renderer + - grafana-version: 11.4.0 + remote-chrome-url: ws://localhost:9222 + feature-flags: 'accessControlOnCall,idForwarding,externalServiceAccounts' + native-rendering: true + # snapshots-folder: remote-chrome + name: local-chrome-11.4.0-with-features-native-renderer steps: - uses: actions/checkout@v4 @@ -76,6 +89,7 @@ jobs: env: GRAFANA_VERSION: ${{ matrix.grafana-version }} GF_REPORTER_PLUGIN_REMOTE_CHROME_URL: ${{ matrix.remote-chrome-url }} + GF_REPORTER_PLUGIN_NATIVE_RENDERER: ${{ matrix.native-rendering }} GF_FEATURE_TOGGLES_ENABLE: ${{ matrix.feature-flags }} run: | # Upload/Download artifacts wont preserve permissions diff --git a/docker-compose.yaml b/docker-compose.yaml index 008c5d4..88a0cdc 100755 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -40,20 +40,24 @@ services: # the token from a service account to read dashboards - GF_FEATURE_TOGGLES_ENABLE=${GF_FEATURE_TOGGLES_ENABLE:-accessControlOnCall,idForwarding,externalServiceAccounts} - GF_AUTH_MANAGED_SERVICE_ACCOUNTS_ENABLED=${GF_AUTH_MANAGED_SERVICE_ACCOUNTS_ENABLED:-true} - # disable alerting because it vomits logs + # disable alerting and Grafana live because it vomits logs - GF_ALERTING_ENABLED=false - GF_UNIFIED_ALERTING_ENABLED=false + - GF_LIVE_MAX_CONNECTIONS=0 + - GF_PLUGINS_DISABLE_PLUGINS=grafana-lokiexplore-app # Grafana image renderer - GF_RENDERING_SERVER_URL=http://renderer:8081/render - GF_RENDERING_CALLBACK_URL=http://grafana:${GF_SERVER_HTTP_PORT:-3000}/ - "GF_LOG_FILTERS=rendering:debug plugin.mahendrapaipuri-dashboardreporter-app:debug" + # Current plugin config + - GF_REPORTER_PLUGIN_NATIVE_RENDERER=${GF_REPORTER_PLUGIN_NATIVE_RENDERER:-false} renderer: image: grafana/grafana-image-renderer:latest environment: # Recommendation of grafana-image-renderer for optimal performance # https://grafana.com/docs/grafana/latest/setup-grafana/image-rendering/#configuration - RENDERING_MODE=clustered - - RENDERING_CLUSTERING_MODE=browser + - RENDERING_CLUSTERING_MODE=context - RENDERING_CLUSTERING_MAX_CONCURRENCY=5 - RENDERING_CLUSTERING_TIMEOUT=60 - IGNORE_HTTPS_ERRORS=true diff --git a/pkg/plugin/chrome/local.go b/pkg/plugin/chrome/local.go index a337f07..d0db1d8 100644 --- a/pkg/plugin/chrome/local.go +++ b/pkg/plugin/chrome/local.go @@ -141,10 +141,4 @@ func (i *LocalInstance) Close(logger log.Logger) { logger.Error("got error from cancel browser context", "error", err) } } - - if i.allocCtx != nil { - if err := chromedp.Cancel(i.allocCtx); err != nil { - logger.Error("got error from cancel browser allocator context", "error", err) - } - } } diff --git a/pkg/plugin/chrome/tab.go b/pkg/plugin/chrome/tab.go index 65725b3..f4d5bdc 100644 --- a/pkg/plugin/chrome/tab.go +++ b/pkg/plugin/chrome/tab.go @@ -102,6 +102,11 @@ func (t *Tab) Context() context.Context { return t.ctx } +// Target returns tab's target ID. +func (t *Tab) Target() *chromedp.Target { + return chromedp.FromContext(t.Context()).Target +} + // PrintToPDF returns chroms tasks that print the requested HTML into a PDF and returns the PDF stream handle. func (t *Tab) PrintToPDF(options PDFOptions, writer io.Writer) error { err := chromedp.Run(t.ctx, chromedp.Tasks{ diff --git a/pkg/plugin/config/settings.go b/pkg/plugin/config/settings.go index a195fdb..3592a38 100644 --- a/pkg/plugin/config/settings.go +++ b/pkg/plugin/config/settings.go @@ -41,6 +41,7 @@ type Config struct { MaxBrowserWorkers int `env:"GF_REPORTER_PLUGIN_MAX_BROWSER_WORKERS, overwrite" json:"maxBrowserWorkers"` MaxRenderWorkers int `env:"GF_REPORTER_PLUGIN_MAX_RENDER_WORKERS, overwrite" json:"maxRenderWorkers"` RemoteChromeURL string `env:"GF_REPORTER_PLUGIN_REMOTE_CHROME_URL, overwrite" json:"remoteChromeUrl"` + NativeRendering bool `env:"GF_REPORTER_PLUGIN_NATIVE_RENDERER, overwrite" json:"nativeRenderer"` IncludePanelIDs []string ExcludePanelIDs []string IncludePanelDataIDs []string @@ -141,10 +142,12 @@ func (c *Config) String() string { "Theme: %s; Orientation: %s; Layout: %s; Dashboard Mode: %s; "+ "Time Zone: %s; Time Format: %s; Encoded Logo: %s; "+ "Max Renderer Workers: %d; Max Browser Workers: %d; Remote Chrome Addr: %s; App URL: %s; "+ - "TLS Skip verify: %v; Included Panel IDs: %s; Excluded Panel IDs: %s Included Data for Panel IDs: %s", + "TLS Skip verify: %v; Included Panel IDs: %s; Excluded Panel IDs: %s Included Data for Panel IDs: %s; "+ + "Native Renderer: %v; Client Timeout: %d", c.Theme, c.Orientation, c.Layout, c.DashboardMode, c.TimeZone, c.TimeFormat, encodedLogo, c.MaxRenderWorkers, c.MaxBrowserWorkers, c.RemoteChromeURL, appURL, - c.SkipTLSCheck, includedPanelIDs, excludedPanelIDs, includeDataPanelIDs, + c.SkipTLSCheck, includedPanelIDs, excludedPanelIDs, includeDataPanelIDs, c.NativeRendering, + int(c.HTTPClientOptions.Timeouts.Timeout.Seconds()), ) } diff --git a/pkg/plugin/dashboard/dashboard.go b/pkg/plugin/dashboard/dashboard.go index 024cc3e..482725e 100644 --- a/pkg/plugin/dashboard/dashboard.go +++ b/pkg/plugin/dashboard/dashboard.go @@ -2,50 +2,58 @@ package dashboard import ( "context" + "embed" + "errors" "fmt" "net/http" "net/url" "strings" + "time" "github.com/grafana/grafana-plugin-sdk-go/backend/log" "github.com/mahendrapaipuri/grafana-dashboard-reporter-app/pkg/plugin/chrome" "github.com/mahendrapaipuri/grafana-dashboard-reporter-app/pkg/plugin/config" - "github.com/mahendrapaipuri/grafana-dashboard-reporter-app/pkg/plugin/worker" + "github.com/mahendrapaipuri/grafana-dashboard-reporter-app/pkg/plugin/helpers" ) -// Regex for parsing X and Y co-ordinates from CSS -// Scales for converting width and height to Grafana units. +// Embed the entire directory. // -// This is based on viewportWidth that we used in client.go which -// is 1952px. Stripping margin 32px we get 1920px / 24 = 80px -// height scale should be fine with 36px as width and aspect ratio -// should choose a height appropriately. -var ( - scales = map[string]float64{ - "width": 80, - "height": 36, - } -) +//go:embed js +var jsFS embed.FS // New creates a new instance of the Dashboard struct. func New(logger log.Logger, conf *config.Config, httpClient *http.Client, chromeInstance chrome.Instance, - pools worker.Pools, appURL, appVersion string, model *Model, authHeader http.Header, -) *Dashboard { + appURL, appVersion string, model *Model, authHeader http.Header, +) (*Dashboard, error) { + // Parse app URL + u, err := url.Parse(appURL) + if err != nil { + return nil, fmt.Errorf("failed to parse app URL: %w", errors.Unwrap(err)) + } + + // Read JS from embedded file + js, err := jsFS.ReadFile("js/panels.js") + if err != nil { + return nil, fmt.Errorf("failed to load JS: %w", err) + } + return &Dashboard{ logger, conf, httpClient, chromeInstance, - pools, - appURL, + u, appVersion, + string(js), model, authHeader, - } + }, nil } // GetData fetches dashboard related data. func (d *Dashboard) GetData(ctx context.Context) (*Data, error) { + defer helpers.TimeTrack(time.Now(), "dashboard data", d.logger) + // Make panels from loading the dashboard in a browser instance panels, err := d.panels(ctx) if err != nil { diff --git a/pkg/plugin/dashboard/data.go b/pkg/plugin/dashboard/data.go index bf9f6dd..28ab482 100644 --- a/pkg/plugin/dashboard/data.go +++ b/pkg/plugin/dashboard/data.go @@ -3,16 +3,18 @@ package dashboard import ( "context" "encoding/csv" - "errors" "fmt" "maps" + "net/url" "strconv" "strings" "time" "github.com/chromedp/cdproto/browser" + "github.com/chromedp/cdproto/runtime" "github.com/chromedp/chromedp" "github.com/mahendrapaipuri/grafana-dashboard-reporter-app/pkg/plugin/chrome" + "github.com/mahendrapaipuri/grafana-dashboard-reporter-app/pkg/plugin/helpers" ) // PanelCSV returns CSV data of a given panel. @@ -20,11 +22,13 @@ func (d *Dashboard) PanelCSV(_ context.Context, p Panel) (CSVData, error) { // Get panel CSV data URL panelURL := d.panelCSVURL(p) + defer helpers.TimeTrack(time.Now(), "fetch panel CSV data", d.logger, "fetcher", "native", "panel_id", p.ID, "url", panelURL.String()) + // Create a new tab tab := d.chromeInstance.NewTab(d.logger, d.conf) // Set a timeout for the tab // Fail-safe for newer Grafana versions, if css has been changed. - tab.WithTimeout(d.conf.HTTPClientOptions.Timeouts.Timeout) + tab.WithTimeout(2 * d.conf.HTTPClientOptions.Timeouts.Timeout) defer tab.Close(d.logger) headers := make(map[string]any) @@ -35,9 +39,7 @@ func (d *Dashboard) PanelCSV(_ context.Context, p Panel) (CSVData, error) { } } - d.logger.Debug("fetch table data via browser", "url", panelURL) - - err := tab.NavigateAndWaitFor(panelURL, headers, "networkIdle") + err := tab.NavigateAndWaitFor(panelURL.String(), headers, "networkIdle") if err != nil { return nil, fmt.Errorf("NavigateAndWaitFor: %w", err) } @@ -51,46 +53,34 @@ func (d *Dashboard) PanelCSV(_ context.Context, p Panel) (CSVData, error) { // Listen for download events. Downloading from JavaScript won't emit any network events. chromedp.ListenTarget(tab.Context(), func(event interface{}) { if eventDownloadWillBegin, ok := event.(*browser.EventDownloadWillBegin); ok { - d.logger.Debug("got CSV download URL", "url", eventDownloadWillBegin.URL) + d.logger.Debug("got CSV download URL", "panel_id", p.ID, "url", eventDownloadWillBegin.URL) // once we have the download URL, we can fetch the CSV data via JavaScript. blobURLCh <- eventDownloadWillBegin.URL } }) + js := fmt.Sprintf( + `waitForCSVData(version = '%s', timeout = %d);`, + d.appVersion, d.conf.HTTPClientOptions.Timeouts.Timeout.Milliseconds(), + ) + downTasks := chromedp.Tasks{ // Downloads needs to be allowed, otherwise the CSV request will be denied. // Allow download events to emit so we can get the download URL. browser.SetDownloadBehavior(browser.SetDownloadBehaviorBehaviorAllowAndName). WithDownloadPath("/dev/null"). WithEventsEnabled(true), - } - - if err = tab.RunWithTimeout(2*time.Second, downTasks); err != nil { - return nil, fmt.Errorf("error setting download behavior: %w", err) - } - - if err = tab.RunWithTimeout(2*time.Second, chromedp.WaitVisible(selDownloadCSVButton, chromedp.ByQuery)); err != nil { - return nil, fmt.Errorf("error waiting for download CSV button: %w", err) - } - - if err = tab.RunWithTimeout(2*time.Second, chromedp.Click(selInspectPanelDataTabExpandDataOptions, chromedp.ByQuery)); err != nil { - return nil, fmt.Errorf("error clicking on expand data options: %w", err) - } - - if err = tab.RunWithTimeout(1*time.Second, chromedp.Click(selInspectPanelDataTabApplyTransformationsToggle, chromedp.ByQuery)); err != nil && !errors.Is(err, context.DeadlineExceeded) { - return nil, fmt.Errorf("error clicking on apply transformations toggle: %w", err) - } - - if err = tab.RunWithTimeout(1*time.Second, chromedp.Click(selInspectPanelDataTabApplyTransformationsToggle, chromedp.ByQuery)); err != nil && !errors.Is(err, context.DeadlineExceeded) { - return nil, fmt.Errorf("error clicking on apply transformations toggle: %w", err) + chromedp.Evaluate(d.jsContent, nil), + chromedp.Evaluate(js, nil, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { + return p.WithAwaitPromise(true) + }), } // Run all tasks in a goroutine. // If an error occurs, it will be sent to the errCh channel. // If a element can't be found, a timeout will occur and the context will be canceled. go func() { - task := chromedp.Evaluate(clickDownloadCSVButton, nil) - if err := tab.Run(task); err != nil { + if err := tab.Run(downTasks); err != nil { errCh <- fmt.Errorf("error fetching CSV URL from browser %s: %w", panelURL, err) } }() @@ -120,7 +110,7 @@ func (d *Dashboard) PanelCSV(_ context.Context, p Panel) (CSVData, error) { chrome.WithAwaitPromise, ) - if err := tab.RunWithTimeout(45*time.Second, task); err != nil { + if err := tab.RunWithTimeout(d.conf.HTTPClientOptions.Timeouts.Timeout, task); err != nil { return nil, fmt.Errorf("error fetching CSV data from URL from browser %s: %w", panelURL, err) } @@ -144,13 +134,18 @@ func (d *Dashboard) PanelCSV(_ context.Context, p Panel) (CSVData, error) { } // panelCSVURL returns URL to fetch panel's CSV data. -func (d *Dashboard) panelCSVURL(p Panel) string { +func (d *Dashboard) panelCSVURL(p Panel) *url.URL { values := maps.Clone(d.model.Dashboard.Variables) values.Add("theme", d.conf.Theme) values.Add("viewPanel", p.ID) values.Add("inspect", p.ID) values.Add("inspectTab", "data") + // Make a copy of appURL + panelURL := *d.appURL + panelURL.Path = fmt.Sprintf("/d/%s/_", d.model.Dashboard.UID) + panelURL.RawQuery = values.Encode() + // Get Panel API endpoint - return fmt.Sprintf("%s/d/%s/_?%s", d.appURL, d.model.Dashboard.UID, values.Encode()) + return &panelURL } diff --git a/pkg/plugin/dashboard/js/panels.js b/pkg/plugin/dashboard/js/panels.js new file mode 100644 index 0000000..d233be3 --- /dev/null +++ b/pkg/plugin/dashboard/js/panels.js @@ -0,0 +1,142 @@ +// Javascript to expand row panels and wait until queries +// and panels are fully loaded on the current Grafana +// dashboard + +// Base backoff duration in ms +const baseDelayMsecs = 10; + +// Define a timer to wait until next try +const timer = ms => new Promise(res => setTimeout(res, ms)); + +// Panel data +const panelData = selector => [...document.querySelectorAll('[' + selector + ']')].map((e) => ({ "x": e.getBoundingClientRect().x, "y": e.getBoundingClientRect().y, "width": e.getBoundingClientRect().width, "height": e.getBoundingClientRect().height, "title": e.innerText.split('\n')[0], "id": e.getAttribute(selector) })) + +/** + * Semantic Versioning Comparing + * #see https://semver.org/ + * #see https://stackoverflow.com/a/65687141/456536 + * #see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Collator/Collator#options + */ +function semverCompare(a, b) { + if (a.startsWith(b + "-")) {return -1} + if (b.startsWith(a + "-")) {return 1} + return a.localeCompare(b, undefined, { numeric: true, sensitivity: "case", caseFirst: "upper" }) +} + +// Wait for queries to finish and panels to load data +const waitForQueriesAndVisualizations = async (version = '11.3.0', mode = 'default', timeout = 30000) => { + // Remove v prefix from version + const ver = version.split('v')[1]; + + // Set selector based on version + let selector; + if (semverCompare(ver, '11.3.0') === -1) { + selector = 'data-panelid'; + } else { + selector = 'data-viz-panel-key' + } + + // Expand row panels if mode is full + if (mode === 'full') { + // For Grafana <= v10 + [...document.getElementsByClassName('dashboard-row--collapsed')].map((e) => e.getElementsByClassName('dashboard-row__title pointer')[0].click()); + // For Grafana > v10 and <= v11 + [...document.querySelectorAll("[data-testid='dashboard-row-container']")].map((e) => [...e.querySelectorAll("[aria-expanded=false]")].map((e) => e.click())); + // For Grafana >= v11.3 + [...document.querySelectorAll("[aria-label='Expand row']")].map((e) => e.click()); + } + + // Always scroll to bottom of the page + window.scrollTo(0, document.body.scrollHeight); + + // Panel count should be unchanged for minStableSizeIterations times + let countStableSizeIterations = 0; + const minStableSizeIterations = 3; + + // Initialise parameters + let lastPanels = []; + let checkCounts = 1; + const start = Date.now(); + + while (Date.now() - start < timeout) { + // Get current number of rendered panels + let currentPanels = document.querySelectorAll("[class$='panel-content']"); + + // If current panels and last panels are same, increment iterator + if (lastPanels.length !== 0 && currentPanels.length === lastPanels.length) { + countStableSizeIterations++; + } else { + countStableSizeIterations = 0; // reset the counter + } + + // If panel count is stable for minStableSizeIterations, return. We assume that + // the dashboard has loaded with all panels + if (countStableSizeIterations >= minStableSizeIterations) { + return panelData(selector); + } + + // If not, wait and retry + lastPanels = currentPanels; + await timer(baseDelayMsecs * 2 ** checkCounts); + checkCounts++; + } + + return panelData(selector); +}; + +// Wait for CSV download button to appear +const waitForCSVDownloadButton = async () => { + // Initialise parameters + let checkCounts = 1; + const start = Date.now(); + + // Wait for download button + while (Date.now() - start < 1000) { + // Get all buttons on inspect panel + let buttons = document.querySelectorAll('div[aria-label="Panel inspector Data content"] button[type="button"]'); + + // Ensure download CSV button exists in buttons + for (let i = 0; i < buttons.length; i++) { + if (buttons[i].innerText === 'Download CSV') { + buttons[i].click(); + return; + } + } + + // If not, wait and retry + await timer(baseDelayMsecs * 2 ** checkCounts); + checkCounts++; + } + + return; +}; + +// Ensures format data toggle is checked to apply all transformations +const checkFormatDataToggle = async () => { + // Get all toggles on inspect panel + let toggles = document.querySelectorAll('div[data-testid="dataOptions"] input#formatted-data-toggle'); + + // Ensure format data toggle is checked + for (let i = 0; i < toggles.length; i++) { + if (!toggles[i].checked) { + toggles[i].click(); + return; + } + } + + return; +}; + +// Waits for CSV data to be ready to download +const waitForCSVData = async (version = '11.3.0', timeout = 30000) => { + // First wait for panel to load data + await waitForQueriesAndVisualizations(version, 'default', timeout); + + // Ensure format data toggle is checked + await checkFormatDataToggle(); + + // Wait for CSV download button and click it + await waitForCSVDownloadButton(); + + return; +}; diff --git a/pkg/plugin/dashboard/panels.go b/pkg/plugin/dashboard/panels.go index 59e96f8..0e43797 100644 --- a/pkg/plugin/dashboard/panels.go +++ b/pkg/plugin/dashboard/panels.go @@ -6,88 +6,27 @@ import ( "fmt" "math" "strings" + "time" "github.com/chromedp/cdproto/runtime" "github.com/chromedp/chromedp" - "golang.org/x/mod/semver" + "github.com/mahendrapaipuri/grafana-dashboard-reporter-app/pkg/plugin/helpers" ) -// Javascripts vars. +// Regex for parsing X and Y co-ordinates from CSS +// Scales for converting width and height to Grafana units. +// +// This is based on viewportWidth that we used in client.go which +// is 1952px. Stripping margin 32px we get 1920px / 24 = 80px +// height scale should be fine with 36px as width and aspect ratio +// should choose a height appropriately. var ( - // JS to uncollapse rows for different Grafana versions. - // Seems like executing JS corresponding to v10 on v11 or v11 on v10 - // does not have any side-effect, so we will always execute both of them. This - // avoids more logic to detect Grafana version. - unCollapseRowsJS = map[string]string{ - "v10": `[...document.getElementsByClassName('dashboard-row--collapsed')].map((e) => e.getElementsByClassName('dashboard-row__title pointer')[0].click())`, - "v11": `[...document.querySelectorAll("[data-testid='dashboard-row-container']")].map((e) => [...e.querySelectorAll("[aria-expanded=false]")].map((e) => e.click()))`, - "v11.3": `[...document.querySelectorAll("[aria-label='Expand row']")].map((e) => e.click())`, + scales = map[string]float64{ + "width": 80, + "height": 36, } - - // dashboardDataJS is a javascript to get dashboard related data. - dashboardDataJS = `[...document.querySelectorAll('[%[1]s]')].map((e)=>({"x": e.getBoundingClientRect().x, "y": e.getBoundingClientRect().y, "width": e.getBoundingClientRect().width, "height": e.getBoundingClientRect().height, "title": e.innerText.split('\n')[0], "id": e.getAttribute("%[1]s")}))` - - // waitPanelsJS is a javascript to wait for all panels to load. - // Seems like in Grafana v11.3.0+, panels are "lazily" loading. We need to scroll to the rows/panels for them to be visible. - // Even after expanding rows, we need to wait for panels to load data for our `dashboardDataJS` to get the panels data. - // It is a bit useless to wait for data to load in panels just to get the list of active panels and their positions but seems - // like we do not have many options here. - waitPanelsJS = `const loadPanels = async(sel = 'data-viz-panel-key', timeout = 30000) => { - // Define a timer to wait until next try - let timer = ms => new Promise(res => setTimeout(res, ms)); - - // Always scroll to bottom of the page - window.scrollTo(0, document.body.scrollHeight); - - // Wait duration between retries - const waitDurationMsecs = 1000; - - // Maximum number of checks based on timeout - const maxChecks = timeout / waitDurationMsecs; - - // Initialise parameters - let lastPanels = []; - let checkCounts = 1; - - // Panel count should be unchanged for minStableSizeIterations times - let countStableSizeIterations = 0; - const minStableSizeIterations = 3; - - while (checkCounts++ <= maxChecks) { - // Get current number of panels - let currentPanels = document.querySelectorAll('[' + sel + ']'); - - // If current panels and last panels are same, increment iterator - if (lastPanels.length !== 0 && currentPanels.length === lastPanels.length) { - countStableSizeIterations++; - } else { - countStableSizeIterations = 0; // reset the counter - } - - // If panel count is stable for minStableSizeIterations, return. We assume that - // the dashboard has loaded with all panels - if (countStableSizeIterations >= minStableSizeIterations) { - return; - } - - // If not, wait and retry - lastPanels = currentPanels; - await timer(waitDurationMsecs); - } - - return; - };` -) - -// Tables related javascripts. -const ( - selDownloadCSVButton = `div[aria-label="Panel inspector Data content"] button[type="button"]` - selInspectPanelDataTabExpandDataOptions = `div[role='dialog'] button[aria-expanded=false]` - selInspectPanelDataTabApplyTransformationsToggle = `div[data-testid="dataOptions"] input:not(#excel-toggle):not(#formatted-data-toggle) + label` ) -var clickDownloadCSVButton = fmt.Sprintf(`[...document.querySelectorAll('%s')].map((e)=>(e.click()))`, selDownloadCSVButton) - // Browser vars. var ( // We must set a view port to browser to ensure chromedp (or chromium) @@ -114,7 +53,7 @@ var ( // panels fetches dashboard panels from Grafana chromium browser instance. func (d *Dashboard) panels(ctx context.Context) ([]Panel, error) { // Fetch dashboard data from browser - dashboardData, err := d.panelData(ctx) + dashboardData, err := d.panelMetaData(ctx) if err != nil { return nil, fmt.Errorf("failed to get dashboard data from browser: %w", err) } @@ -125,11 +64,13 @@ func (d *Dashboard) panels(ctx context.Context) ([]Panel, error) { return d.createPanels(dashboardData) } -// panelData fetches dashboard panels data from Grafana chromium browser instance. -func (d *Dashboard) panelData(_ context.Context) ([]interface{}, error) { +// panelMetaData fetches dashboard panels metadata from Grafana chromium browser instance. +func (d *Dashboard) panelMetaData(_ context.Context) ([]interface{}, error) { // Get dashboard URL dashURL := fmt.Sprintf("%s/d/%s/_?%s", d.appURL, d.model.Dashboard.UID, d.model.Dashboard.Variables.Encode()) + defer helpers.TimeTrack(time.Now(), "fetch dashboard panels metadata", d.logger, "url", dashURL) + // Create a new tab tab := d.chromeInstance.NewTab(d.logger, d.conf) tab.WithTimeout(2 * d.conf.HTTPClientOptions.Timeouts.Timeout) @@ -150,48 +91,23 @@ func (d *Dashboard) panelData(_ context.Context) ([]interface{}, error) { tasks := make(chromedp.Tasks, 0) - // JS attribute for fetching dashboard data has changed in v11.3.0 - var dashDataJS string - if semver.Compare(d.appVersion, "v11.3.0") == -1 { - dashDataJS = fmt.Sprintf(dashboardDataJS, "data-panelid") - } else { - dashDataJS = fmt.Sprintf(dashboardDataJS, "data-viz-panel-key") - - // Set viewport. Seems like it is crucial for Grafana v11.3.0+ - tasks = append(tasks, chromedp.EmulateViewport(viewportWidth, viewportHeight)) - - // Add `loadPanels()` func to tab - tasks = append(tasks, chromedp.Evaluate(waitPanelsJS, nil)) - - // Wait for all panels to lazy load - tasks = append(tasks, chromedp.Evaluate(`loadPanels();`, nil, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { - return p.WithAwaitPromise(true) - })) - } - - // If full dashboard mode is requested, add js that uncollapses rows - if d.conf.DashboardMode == "full" { - for _, jsExpr := range unCollapseRowsJS { - tasks = append(tasks, chromedp.Evaluate(jsExpr, nil)) - } - - // For Grafana v11.3.0+, wait for all expanded panels to load - if semver.Compare(d.appVersion, "v11.3.0") > -1 { - tasks = append(tasks, chromedp.Evaluate(`loadPanels();`, nil, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { - return p.WithAwaitPromise(true) - })) - } - } - // Fetch dashboard data var dashboardData []interface{} // var buf []byte + js := fmt.Sprintf( + `waitForQueriesAndVisualizations(version = '%s', mode = '%s', timeout = %d);`, + d.appVersion, d.conf.DashboardMode, d.conf.HTTPClientOptions.Timeouts.Timeout.Milliseconds(), + ) + // JS that will fetch dashboard model tasks = append(tasks, chromedp.Tasks{ - chromedp.Evaluate(dashDataJS, &dashboardData), - // chromedp.CaptureScreenshot(&buf), + chromedp.Evaluate(d.jsContent, nil), + chromedp.EmulateViewport(viewportWidth, viewportHeight), + chromedp.Evaluate(js, &dashboardData, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { + return p.WithAwaitPromise(true) + }), }...) if err := tab.Run(tasks); err != nil { diff --git a/pkg/plugin/dashboard/panels_test.go b/pkg/plugin/dashboard/panels_test.go index 6cd5427..48e9d78 100644 --- a/pkg/plugin/dashboard/panels_test.go +++ b/pkg/plugin/dashboard/panels_test.go @@ -17,7 +17,6 @@ import ( "github.com/grafana/grafana-plugin-sdk-go/backend/log" "github.com/mahendrapaipuri/grafana-dashboard-reporter-app/pkg/plugin/chrome" "github.com/mahendrapaipuri/grafana-dashboard-reporter-app/pkg/plugin/config" - "github.com/mahendrapaipuri/grafana-dashboard-reporter-app/pkg/plugin/worker" . "github.com/smartystreets/goconvey/convey" ) @@ -97,17 +96,11 @@ func TestDashboardFetchWithLocalChrome(t *testing.T) { HTTPClientOptions: httpclient.Options{Timeouts: &httpclient.DefaultTimeoutOptions}, } - ctx := context.Background() - workerPools := worker.Pools{ - worker.Browser: worker.New(ctx, 6), - worker.Renderer: worker.New(ctx, 2), - } - dash := New( + dash, err := New( log.NewNullLogger(), &conf, http.DefaultClient, chromeInstance, - workerPools, ts.URL, "v11.4.0", &Model{Dashboard: struct { @@ -125,7 +118,12 @@ func TestDashboardFetchWithLocalChrome(t *testing.T) { backend.CookiesHeaderName: []string{"cookie"}, }, ) - d, err := dash.panelData(context.Background()) + + Convey("New dashboard should receive no errors", func() { + So(err, ShouldBeNil) + }) + + d, err := dash.panelMetaData(context.Background()) Convey("It should receive no errors", func() { So(err, ShouldBeNil) @@ -200,17 +198,11 @@ func TestDashboardFetchWithRemoteChrome(t *testing.T) { HTTPClientOptions: httpclient.Options{Timeouts: &httpclient.DefaultTimeoutOptions}, } - ctx := context.Background() - workerPools := worker.Pools{ - worker.Browser: worker.New(ctx, 6), - worker.Renderer: worker.New(ctx, 2), - } - dash := New( + dash, err := New( log.NewNullLogger(), &conf, http.DefaultClient, chromeInstance, - workerPools, ts.URL, "v11.4.0", &Model{Dashboard: struct { @@ -228,7 +220,12 @@ func TestDashboardFetchWithRemoteChrome(t *testing.T) { backend.CookiesHeaderName: []string{"cookie"}, }, ) - d, err := dash.panelData(context.Background()) + + Convey("New dashboard should receive no errors", func() { + So(err, ShouldBeNil) + }) + + d, err := dash.panelMetaData(context.Background()) Convey("It should receive no errors", func() { So(err, ShouldBeNil) @@ -248,12 +245,11 @@ func TestDashboardFetchWithRemoteChrome(t *testing.T) { func TestDashboardCreatePanels(t *testing.T) { Convey("When creating panels for Dashboard", t, func() { - dash := New( + dash, err := New( log.NewNullLogger(), nil, nil, nil, - worker.Pools{}, "http://localhost:3000", "v11.4.0", &Model{Dashboard: struct { @@ -270,10 +266,14 @@ func TestDashboardCreatePanels(t *testing.T) { nil, ) + Convey("New dashboard should receive no errors", func() { + So(err, ShouldBeNil) + }) + dashDataString := `[{"width":940,"height":258,"x":0,"y":0,"id":"12"},{"width":940,"height":258,"x":940,"y":0,"id":"26"},{"width":940,"height":258,"x":0,"y":0,"id":"27"}]` var dashData []interface{} - err := json.Unmarshal([]byte(dashDataString), &dashData) + err = json.Unmarshal([]byte(dashDataString), &dashData) Convey("setup dashboard data unmarshal", func() { So(err, ShouldBeNil) diff --git a/pkg/plugin/dashboard/renderer.go b/pkg/plugin/dashboard/renderer.go index 6d08e90..1084120 100644 --- a/pkg/plugin/dashboard/renderer.go +++ b/pkg/plugin/dashboard/renderer.go @@ -8,19 +8,96 @@ import ( "io" "maps" "net/http" + "net/url" "strconv" "time" + + "github.com/chromedp/cdproto/runtime" + "github.com/chromedp/chromedp" + "github.com/mahendrapaipuri/grafana-dashboard-reporter-app/pkg/plugin/helpers" ) var getPanelRetrySleepTime = time.Duration(10) * time.Second // PanelPNG returns encoded PNG image of a given panel. func (d *Dashboard) PanelPNG(ctx context.Context, p Panel) (PanelImage, error) { + if d.conf.NativeRendering { + return d.panelPNGNativeRenderer(ctx, p) + } + + return d.panelPNGImageRenderer(ctx, p) +} + +// panelPNGNativeRenderer returns panel PNG data by capturing screenshot of panel in browser. +func (d *Dashboard) panelPNGNativeRenderer(_ context.Context, p Panel) (PanelImage, error) { + // Get panel URL + panelURL := d.panelPNGURL(p, false) + + defer helpers.TimeTrack(time.Now(), "fetch panel PNG", d.logger, "panel_id", p.ID, "renderer", "native", "url", panelURL.String()) + + // Create a new tab + tab := d.chromeInstance.NewTab(d.logger, d.conf) + tab.WithTimeout(2 * d.conf.HTTPClientOptions.Timeouts.Timeout) + defer tab.Close(d.logger) + + headers := make(map[string]any) + + for name, values := range d.authHeader { + for _, value := range values { + headers[name] = value + } + } + + err := tab.NavigateAndWaitFor(panelURL.String(), headers, "networkIdle") + if err != nil { + return PanelImage{}, fmt.Errorf("NavigateAndWaitFor: %w", err) + } + + var buf []byte + + tasks := make(chromedp.Tasks, 0) + + js := fmt.Sprintf( + `waitForQueriesAndVisualizations(version = '%s', timeout = %d);`, + d.appVersion, d.conf.HTTPClientOptions.Timeouts.Timeout.Milliseconds(), + ) + + tasks = append(tasks, chromedp.Tasks{ + chromedp.Evaluate(d.jsContent, nil), + chromedp.EmulateViewport(d.panelDims(p)), + chromedp.Evaluate(js, nil, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { + return p.WithAwaitPromise(true) + }), + chromedp.CaptureScreenshot(&buf), + }...) + + if err := tab.Run(tasks); err != nil { + return PanelImage{}, fmt.Errorf("error fetching panel PNG from browser %s: %w", panelURL.String(), err) + } + + sb := &bytes.Buffer{} + + encoder := base64.NewEncoder(base64.StdEncoding, sb) + + if _, err = encoder.Write(buf); err != nil { + return PanelImage{}, fmt.Errorf("error reading data of panel PNG: %w", err) + } + + return PanelImage{ + Image: sb.String(), + MimeType: "image/png", + }, nil +} + +// panelPNGImageRenderer returns panel PNG data by making API requests to grafana-image-renderer. +func (d *Dashboard) panelPNGImageRenderer(ctx context.Context, p Panel) (PanelImage, error) { // Get panel render URL - panelURL := d.panelPNGURL(p, d.model.Dashboard.UID) + panelURL := d.panelPNGURL(p, true) + + defer helpers.TimeTrack(time.Now(), "fetch panel PNG", d.logger, "panel_id", p.ID, "renderer", "grafana-image-renderer", "url", panelURL.String()) // Create a new request for panel - req, err := http.NewRequestWithContext(ctx, http.MethodGet, panelURL, nil) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, panelURL.String(), nil) if err != nil { return PanelImage{}, fmt.Errorf("error creating request for %s: %w", panelURL, err) } @@ -83,33 +160,52 @@ func (d *Dashboard) PanelPNG(ctx context.Context, p Panel) (PanelImage, error) { }, nil } -// panelPNGURL returns the URL to fetch panel PNd. -func (d *Dashboard) panelPNGURL(p Panel, dashUID string) string { +// panelPNGURL returns the URL to fetch panel PNG. +func (d *Dashboard) panelPNGURL(p Panel, render bool) *url.URL { values := maps.Clone(d.model.Dashboard.Variables) values.Add("theme", d.conf.Theme) values.Add("panelId", p.ID) - if d.conf.TimeZone != "" { + if d.conf.TimeZone != "" && values.Get("timezone") == "" { values.Add("timezone", d.conf.TimeZone) } + // Get panel dimensions + w, h := d.panelDims(p) + values.Add("width", strconv.FormatInt(w, 10)) + values.Add("height", strconv.FormatInt(h, 10)) + + // If render is true call grafana-image-renderer API URL + var renderer string + if render { + renderer = "render/" + } + + // Make a copy of appURL + panelURL := *d.appURL + panelURL.Path = fmt.Sprintf("/%sd-solo/%s/_", renderer, d.model.Dashboard.UID) + panelURL.RawQuery = values.Encode() + + // Get Panel API endpoint + return &panelURL +} + +// panelDims returns width and height of panel based on layout. +func (d *Dashboard) panelDims(p Panel) (int64, int64) { // If using a grid layout we use 100px for width and 36px for height scalind. // Grafana panels are fitted into 24 units width and height units are said to // 30px in docs but 36px seems to be better. // // In simple layout we create panels with 1000x500 resolution always and include // them one in each page of report + var width, height int64 if d.conf.Layout == "grid" { - width := int(p.GridPos.W * 100) - height := int(p.GridPos.H * 36) - - values.Add("width", strconv.Itoa(width)) - values.Add("height", strconv.Itoa(height)) + width = int64(p.GridPos.W * 100) + height = int64(p.GridPos.H * 36) } else { - values.Add("width", "1000") - values.Add("height", "500") + width = 1000 + height = 500 } - // Get Panel API endpoint - return fmt.Sprintf("%s/render/d-solo/%s/_?%s", d.appURL, dashUID, values.Encode()) + return width, height } diff --git a/pkg/plugin/dashboard/renderer_test.go b/pkg/plugin/dashboard/renderer_test.go index 6f93be8..22d16bf 100644 --- a/pkg/plugin/dashboard/renderer_test.go +++ b/pkg/plugin/dashboard/renderer_test.go @@ -12,7 +12,6 @@ import ( "github.com/grafana/grafana-plugin-sdk-go/backend/log" "github.com/mahendrapaipuri/grafana-dashboard-reporter-app/pkg/plugin/chrome" "github.com/mahendrapaipuri/grafana-dashboard-reporter-app/pkg/plugin/config" - "github.com/mahendrapaipuri/grafana-dashboard-reporter-app/pkg/plugin/worker" . "github.com/smartystreets/goconvey/convey" ) @@ -42,18 +41,11 @@ func TestFetchPanelPNG(t *testing.T) { variables.Add("from", "now-1h") variables.Add("to", "now") - ctx := context.Background() - workerPools := worker.Pools{ - worker.Browser: worker.New(ctx, 6), - worker.Renderer: worker.New(ctx, 2), - } - - dash := New( + dash, err := New( log.NewNullLogger(), &conf, http.DefaultClient, &chrome.LocalInstance{}, - workerPools, ts.URL, "v11.1.0", &Model{Dashboard: struct { @@ -72,7 +64,12 @@ func TestFetchPanelPNG(t *testing.T) { backend.OAuthIdentityTokenHeaderName: []string{"Bearer token"}, }, ) - _, err := dash.PanelPNG(context.Background(), Panel{ID: "44", Type: "singlestat", Title: "title", GridPos: GridPos{}}) + + Convey("New dashboard should receive no errors", func() { + So(err, ShouldBeNil) + }) + + _, err = dash.PanelPNG(context.Background(), Panel{ID: "44", Type: "singlestat", Title: "title", GridPos: GridPos{}}) Convey("It should receives no errors", func() { So(err, ShouldBeNil) @@ -108,12 +105,11 @@ func TestFetchPanelPNG(t *testing.T) { // Use grid layout conf.Layout = "grid" - dash = New( + dash, err = New( log.NewNullLogger(), &conf, http.DefaultClient, &chrome.LocalInstance{}, - workerPools, ts.URL, "v11.1.0", &Model{Dashboard: struct { @@ -133,6 +129,10 @@ func TestFetchPanelPNG(t *testing.T) { }, ) + Convey("New dashboard should receive no errors using grid layout", func() { + So(err, ShouldBeNil) + }) + _, err = dash.PanelPNG(context.Background(), Panel{ID: "44", Type: "graph", Title: "title", GridPos: GridPos{H: 6, W: 24}}) Convey("It should receives no errors using grid layout", func() { diff --git a/pkg/plugin/dashboard/types.go b/pkg/plugin/dashboard/types.go index c365ed5..2cb2a49 100644 --- a/pkg/plugin/dashboard/types.go +++ b/pkg/plugin/dashboard/types.go @@ -10,7 +10,6 @@ import ( "github.com/grafana/grafana-plugin-sdk-go/backend/log" "github.com/mahendrapaipuri/grafana-dashboard-reporter-app/pkg/plugin/chrome" "github.com/mahendrapaipuri/grafana-dashboard-reporter-app/pkg/plugin/config" - "github.com/mahendrapaipuri/grafana-dashboard-reporter-app/pkg/plugin/worker" ) // Dashboard represents a Grafana dashboard resource. @@ -19,9 +18,9 @@ type Dashboard struct { conf *config.Config httpClient *http.Client chromeInstance chrome.Instance - workerPools worker.Pools - appURL string + appURL *url.URL appVersion string + jsContent string model *Model authHeader http.Header } diff --git a/pkg/plugin/helpers/helpers.go b/pkg/plugin/helpers/helpers.go new file mode 100644 index 0000000..d45a063 --- /dev/null +++ b/pkg/plugin/helpers/helpers.go @@ -0,0 +1,14 @@ +package helpers + +import ( + "time" + + "github.com/grafana/grafana-plugin-sdk-go/backend/log" +) + +// TimeTrack tracks execution time of each function. +func TimeTrack(start time.Time, name string, logger log.Logger, args ...interface{}) { + elapsed := time.Since(start) + args = append(args, "duration", elapsed.String()) + logger.Debug(name, args...) +} diff --git a/pkg/plugin/report/report.go b/pkg/plugin/report/report.go index c63ebcb..9e785fc 100644 --- a/pkg/plugin/report/report.go +++ b/pkg/plugin/report/report.go @@ -19,6 +19,7 @@ import ( "github.com/mahendrapaipuri/grafana-dashboard-reporter-app/pkg/plugin/chrome" "github.com/mahendrapaipuri/grafana-dashboard-reporter-app/pkg/plugin/config" "github.com/mahendrapaipuri/grafana-dashboard-reporter-app/pkg/plugin/dashboard" + "github.com/mahendrapaipuri/grafana-dashboard-reporter-app/pkg/plugin/helpers" "github.com/mahendrapaipuri/grafana-dashboard-reporter-app/pkg/plugin/worker" ) @@ -51,6 +52,8 @@ func New(logger log.Logger, conf *config.Config, httpClient *http.Client, chrome } func (r *Report) Generate(ctx context.Context, writer http.ResponseWriter) error { + defer helpers.TimeTrack(time.Now(), "report generation", r.logger) + // Get panel data from dashboard dashboardData, err := r.dashboard.GetData(ctx) if err != nil { @@ -87,6 +90,8 @@ func (r *Report) Generate(ctx context.Context, writer http.ResponseWriter) error // populatePanels populates the panels with PNG and tabular data. func (r *Report) populatePanels(ctx context.Context, dashboardData *dashboard.Data) error { + defer helpers.TimeTrack(time.Now(), "panel PNGs and/or data generation", r.logger) + // Get the indexes of PNG panels that need to be included in the report pngPanels := selectPanels(dashboardData.Panels, r.conf.IncludePanelIDs, r.conf.ExcludePanelIDs, true) @@ -98,33 +103,33 @@ func (r *Report) populatePanels(ctx context.Context, dashboardData *dashboard.Da wg := sync.WaitGroup{} for idx, panel := range dashboardData.Panels { - if slices.Contains(tablePanels, idx) { + if slices.Contains(pngPanels, idx) { wg.Add(1) - r.pools[worker.Browser].Do(func() { + r.pools[worker.Renderer].Do(func() { defer wg.Done() - panelData, err := r.dashboard.PanelCSV(ctx, panel) + panelPNG, err := r.dashboard.PanelPNG(ctx, panel) if err != nil { - errorCh <- fmt.Errorf("failed to fetch CSV data for panel %s: %w", panel.ID, err) + errorCh <- fmt.Errorf("failed to fetch PNG data for panel %s: %w", panel.ID, err) } - dashboardData.Panels[idx].CSVData = panelData + dashboardData.Panels[idx].EncodedImage = panelPNG }) } - if slices.Contains(pngPanels, idx) { + if slices.Contains(tablePanels, idx) { wg.Add(1) - r.pools[worker.Renderer].Do(func() { + r.pools[worker.Browser].Do(func() { defer wg.Done() - panelPNG, err := r.dashboard.PanelPNG(ctx, panel) + panelData, err := r.dashboard.PanelCSV(ctx, panel) if err != nil { - errorCh <- fmt.Errorf("failed to fetch PNG data for panel %s: %w", panel.ID, err) + errorCh <- fmt.Errorf("failed to fetch CSV data for panel %s: %w", panel.ID, err) } - dashboardData.Panels[idx].EncodedImage = panelPNG + dashboardData.Panels[idx].CSVData = panelData }) } } @@ -246,6 +251,8 @@ func (r *Report) generateHTMLFile(dashboardData *dashboard.Data) (HTML, error) { // renderPDF renders HTML page into PDF using Chromium. func (r *Report) renderPDF(htmlReport HTML, writer io.Writer) error { + defer helpers.TimeTrack(time.Now(), "pdf rendering", r.logger) + // Create a new tab tab := r.chromeInstance.NewTab(r.logger, r.conf) defer tab.Close(r.logger) diff --git a/pkg/plugin/resources.go b/pkg/plugin/resources.go index c9996ed..c1c4641 100644 --- a/pkg/plugin/resources.go +++ b/pkg/plugin/resources.go @@ -335,17 +335,22 @@ func (app *App) handleReport(w http.ResponseWriter, req *http.Request) { } } - grafanaDashboard := dashboard.New( + grafanaDashboard, err := dashboard.New( ctxLogger, &conf, app.httpClient, app.chromeInstance, - app.workerPools, grafanaAppURL, app.grafanaSemVer, model, authHeader, ) + if err != nil { + ctxLogger.Error("failed to create a new dashboard", "err", err) + http.Error(w, "error generating report", http.StatusInternalServerError) + + return + } ctxLogger.Info(fmt.Sprintf("generate report using %s chrome", app.chromeInstance.Name())) diff --git a/pkg/plugin/worker/pool.go b/pkg/plugin/worker/pool.go index 306b37a..0c9f48d 100644 --- a/pkg/plugin/worker/pool.go +++ b/pkg/plugin/worker/pool.go @@ -19,13 +19,13 @@ const ( ) func New(ctx context.Context, maxWorker int) *Pool { - queue := make(chan func(), maxWorker) - ctx, cancel := context.WithCancel(ctx) - if maxWorker <= 0 { maxWorker = runtime.NumCPU() } + queue := make(chan func(), maxWorker) + ctx, cancel := context.WithCancel(ctx) + for range maxWorker { go func() { for { diff --git a/provisioning/plugins/app.yaml b/provisioning/plugins/app.yaml index 6a4c157..2909e79 100755 --- a/provisioning/plugins/app.yaml +++ b/provisioning/plugins/app.yaml @@ -162,3 +162,13 @@ apps: # chrome instance # remoteChromeUrl: '' + + # Render Panel PNGs natively using current plugin. + # When set to `true`, the plugin generates panel PNGs natively without using + # `grafana-image-renderer`. Thus, if it is set to `true`, there is no need + # to install `grafana-image-renderer`. However, `chromium` must be available + # for report and panel PNG generation + # + # THIS IS HIGHLY EXPERIMENTAL FEATURE. + # + nativeRenderer: false diff --git a/scripts/e2e-tests.sh b/scripts/e2e-tests.sh index 2cec12c..b8d0601 100755 --- a/scripts/e2e-tests.sh +++ b/scripts/e2e-tests.sh @@ -2,7 +2,7 @@ # Constants API_PATH="api/plugins/mahendrapaipuri-dashboardreporter-app/resources/report" -DASH_UID="fdlwjnyim1la8f" +DASH_UID="b3228ada-fd89-4aed-8605-d5f7b95aa237" # CLI ARGS VARIANT="$1" @@ -10,12 +10,12 @@ VARIANT="$1" if [[ "$VARIANT" == "plain" ]]; then GRAFANA_PROTOCOL="http" GRAFANA_PORT="3080" - QUERY_PARAMS="layout=simple&orientation=portrait&dashboardMode=default&var-testvar0=All&var-testvar1=foo&var-testvar2=1" + QUERY_PARAMS='from=now-60d&to=now&var-job=$__all&var-instance=$__all&var-interval=1h&var-ds=PBFA97CFB590B2093&layout=simple&orientation=portrait&dashboardMode=default' REPORT_NAME="default" else GRAFANA_PROTOCOL="https" GRAFANA_PORT="3443" - QUERY_PARAMS="layout=grid&orientation=landscape&dashboardMode=full&from=now-5m&to=now&var-testvar0=All&var-testvar1=foo&var-testvar2=1" + QUERY_PARAMS='from=now-60d&to=now&var-job=$__all&var-instance=$__all&var-interval=1h&var-ds=PBFA97CFB590B2093&layout=grid&orientation=landscape&dashboardMode=full' REPORT_NAME="alternative" fi diff --git a/src/README.md b/src/README.md index c1fc173..5dc95aa 100755 --- a/src/README.md +++ b/src/README.md @@ -47,10 +47,10 @@ generating and sending reports automatically, they should look into official plu However, it is still possible to install this plugin using `grafana-cli` by overriding `pluginUrl` by using URL from [releases](https://github.com/mahendrapaipuri/grafana-dashboard-reporter-app/releases). -For example following command will install plugin version `v1.6.3` +For example following command will install plugin version `1.7.1` ```bash -grafana-cli --pluginUrl https://github.com/mahendrapaipuri/grafana-dashboard-reporter-app/releases/download/v1.6.3/mahendrapaipuri-dashboardreporter-app-1.6.3.zip plugins install mahendrapaipuri-dashboardreporter-app +VERSION=1.7.1 grafana-cli --pluginUrl "https://github.com/mahendrapaipuri/grafana-dashboard-reporter-app/releases/download/v${VERSION}/mahendrapaipuri-dashboardreporter-app-${VERSION}.zip" plugins install mahendrapaipuri-dashboardreporter-app ``` Similarly, `nightly` version can be installed suing @@ -590,6 +590,23 @@ error messages will be as follows: the host. In that case, we advise to install `chromium` on the machine which will install all the dependent libraries. +- On Ubuntu, for a more hassle-free experience, install `google-chrome` +from [DEB package](https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb) +instead of installing `chromium` from `snap`. + +- If Grafana server is running inside a systemd service file, sometimes users might +see errors as follows: + + ```bash + couldn't create browser context: chrome failed to start:\nchrome_crashpad_handler: --database is required\nTry 'chrome_crashpad_handler --help' for more information.\n[147301:147301:0102/092026.518581:ERROR:socket.cc(120)] recvmsg: Connection reset by peer (104)\n" + ``` + + This is due to `google-chrome`/`chromium` not able to create user profile + directories. A solution is to set environment variables + `XDG_CONFIG_HOME=/tmp/.chrome` and `XDG_CACHE_HOME=/tmp/.chrome` on the Grafana + process. If users do not wish to use `/tmp`, any folder where Grafana process + has write permissions can be used. + - If you get `permission denied` response when generating a report, it is due to the user not having `View` permissions on the dashboard that they are attempting to generate the report.