diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..e6efdeb --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +/logs +/cache diff --git a/boot.go b/boot.go index 06568ed..189dcd9 100644 --- a/boot.go +++ b/boot.go @@ -388,39 +388,40 @@ func (t *Tool) createServiceTokens() error { done := make(map[string]struct{}) return t.topology.Walk(func(n Node) error { - for _, s := range n.Services { - if _, ok := done[s.Name]; ok { - continue - } + if n.Service == nil { + return nil + } + if _, ok := done[n.Service.Name]; ok { + return nil + } - token := &api.ACLToken{ - Description: "service--" + s.Name, - Local: false, - ServiceIdentities: []*api.ACLServiceIdentity{ - &api.ACLServiceIdentity{ - ServiceName: s.Name, - }, + token := &api.ACLToken{ + Description: "service--" + n.Service.Name, + Local: false, + ServiceIdentities: []*api.ACLServiceIdentity{ + &api.ACLServiceIdentity{ + ServiceName: n.Service.Name, }, - } + }, + } - token, err := consulfunc.CreateOrUpdateToken(t.clientDC1, token) - if err != nil { - return err - } + token, err := consulfunc.CreateOrUpdateToken(t.clientDC1, token) + if err != nil { + return err + } - t.logger.Info("service token created", - "service", s.Name, - "token", token.SecretID, - ) + t.logger.Info("service token created", + "service", n.Service.Name, + "token", token.SecretID, + ) - if err := t.cache.SaveValue("service-token--"+s.Name, token.SecretID); err != nil { - return err - } + if err := t.cache.SaveValue("service-token--"+n.Service.Name, token.SecretID); err != nil { + return err + } - t.setToken("service", s.Name, token.SecretID) + t.setToken("service", n.Service.Name, token.SecretID) - done[s.Name] = struct{}{} - } + done[n.Service.Name] = struct{}{} return nil }) } @@ -437,6 +438,34 @@ func (t *Tool) writeCentralConfigs() error { ce := client.ConfigEntries() entries := t.config.ConfigEntries + if t.config.Monitor.Prometheus { + found := false + for _, entry := range entries { + if entry.GetKind() != api.ProxyDefaults { + continue + } + if entry.GetName() != api.ProxyConfigGlobal { + continue + } + ce := entry.(*api.ProxyConfigEntry) + if ce.Config == nil { + ce.Config = make(map[string]interface{}) + } + // hardcoded address of prometheus container + ce.Config["envoy_prometheus_bind_addr"] = "0.0.0.0:9102" + found = true + break + } + if !found { + entries = append(entries, &api.ProxyConfigEntry{ + Kind: api.ProxyDefaults, + Name: api.ProxyConfigGlobal, + Config: map[string]interface{}{ + "envoy_prometheus_bind_addr": "0.0.0.0:9102", + }, + }) + } + } for _, entry := range entries { if _, _, err := ce.Set(entry, nil); err != nil { @@ -484,19 +513,21 @@ func (t *Tool) writeCentralConfigs() error { func (t *Tool) writeServiceRegistrationFiles() error { return t.topology.Walk(func(n Node) error { - for _, s := range n.Services { - var buf bytes.Buffer - if err := serviceRegistrationT.Execute(&buf, &s); err != nil { - return err - } - regHCL := buf.String() + if n.Service == nil { + return nil + } - filename := "servicereg__" + n.Name + "__" + s.Name + ".hcl" - if err := t.cache.WriteStringFile(filename, regHCL); err != nil { - return err - } - t.logger.Info("Generated", "filename", filename) + var buf bytes.Buffer + if err := serviceRegistrationT.Execute(&buf, n.Service); err != nil { + return err + } + regHCL := buf.String() + + filename := "servicereg__" + n.Name + "__" + n.Service.Name + ".hcl" + if err := t.cache.WriteStringFile(filename, regHCL); err != nil { + return err } + t.logger.Info("Generated", "filename", filename) return nil }) } @@ -523,23 +554,24 @@ func intentionKey(i *api.Intention) string { func (t *Tool) createIntentions() error { return t.topology.Walk(func(n Node) error { - for _, s := range n.Services { - i := &api.Intention{ - SourceName: s.Name, - DestinationName: s.UpstreamName, - Action: api.IntentionActionAllow, - } - - oi, err := consulfunc.CreateOrUpdateIntention(t.clientDC1, i) - if err != nil { - return err - } + if n.Service == nil { + return nil + } - t.logger.Info("created/updated intention", "src", oi.SourceName, - "dst", oi.DestinationName, "action", oi.Action) + i := &api.Intention{ + SourceName: n.Service.Name, + DestinationName: n.Service.UpstreamName, + Action: api.IntentionActionAllow, + } - return nil + oi, err := consulfunc.CreateOrUpdateIntention(t.clientDC1, i) + if err != nil { + return err } + + t.logger.Info("created/updated intention", "src", oi.SourceName, + "dst", oi.DestinationName, "action", oi.Action) + return nil }) } @@ -643,6 +675,7 @@ services = [ {{- if .UpstreamDatacenter }} datacenter = "{{.UpstreamDatacenter}}" {{- end }} +{{ .UpstreamExtraHCL }} }, ] } diff --git a/connect_service_dashboard.json b/connect_service_dashboard.json new file mode 100644 index 0000000..d82720e --- /dev/null +++ b/connect_service_dashboard.json @@ -0,0 +1,1457 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "id": 2, + "iteration": 1565378975112, + "links": [], + "panels": [ + { + "content": "
\n \"Consul   \n svc/$service\n
", + "gridPos": { + "h": 2.4, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 20, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": "Prometheus", + "decimals": null, + "format": "percentunit", + "gauge": { + "maxValue": 1, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 2.4 + }, + "id": 5, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(rate(envoy_http_downstream_rq_xx{response_code_class!=\"5\",local_cluster=\"$service\",envoy_http_conn_manager_prefix=\"public_listener_http\"}[1m])) / sum(rate(envoy_http_downstream_rq_xx{local_cluster=\"$service\",envoy_http_conn_manager_prefix=\"public_listener_http\"}[1m]))", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "0.9,.99", + "title": "SUCCESS RATE", + "transparent": true, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Prometheus", + "decimals": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 6, + "y": 2.4 + }, + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": " RPS", + "postfixFontSize": "100%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(envoy_http_downstream_rq_total{local_cluster=\"$service\",envoy_http_conn_manager_prefix=\"public_listener_http\"}[30s]))", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "REQUEST RATE", + "transparent": true, + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Prometheus", + "decimals": null, + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 2.4 + }, + "id": 81, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "100%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "histogram_quantile(0.95, sum(irate(envoy_cluster_upstream_rq_time_bucket{consul_service=\"$service\"}[30s])) by (le))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Source: $local_cluster", + "refId": "A" + } + ], + "thresholds": "", + "title": "P95 LATENCY", + "transparent": true, + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Prometheus", + "format": "bps", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 18, + "y": 2.4 + }, + "id": 89, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(184, 119, 217, 0.18)", + "full": false, + "lineColor": "#B877D9", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(envoy_http_downstream_cx_rx_bytes_total[1m]))*8 + sum(irate(envoy_cluster_upstream_cx_rx_bytes_total{local_cluster=\"$service\"}[1m])) * 8", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "rx", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Data Rx", + "transparent": true, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#FADE2A" + ], + "datasource": "Prometheus", + "format": "bps", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 21, + "y": 2.4 + }, + "id": 90, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(250, 222, 42, 0.11)", + "full": false, + "lineColor": "#FADE2A", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(envoy_http_downstream_cx_tx_bytes_total[1m]))*8 + sum(irate(envoy_cluster_upstream_cx_tx_bytes_total{local_cluster=\"$service\"}[1m])) * 8", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "tx", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Data Tx", + "transparent": true, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "content": "
\n INBOUND TRAFFIC\n
", + "gridPos": { + "h": 2.2, + "w": 24, + "x": 0, + "y": 7.4 + }, + "id": 17, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 9.600000000000001 + }, + "id": 87, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(envoy_http_downstream_rq_xx{local_cluster=\"$service\",envoy_http_conn_manager_prefix=\"public_listener_http\"}[1m])) by (local_cluster, response_code_class)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{response_code_class}}xx", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "RESPONSES PER MIN BY HTTP STATUS", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 9.600000000000001 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(envoy_http_downstream_rq_total{local_cluster=\"$service\",envoy_http_conn_manager_prefix=\"public_listener_http\"}[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "svc/{{consul_service}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "REQUEST RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "rps", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 9.600000000000001 + }, + "id": 68, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.5, sum(irate(envoy_cluster_upstream_rq_time_bucket{consul_service=\"$service\"}[30s])) by (le))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "p50", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.95, sum(irate(envoy_cluster_upstream_rq_time_bucket{consul_service=\"$service\"}[30s])) by (le))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "p95", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.99, sum(irate(envoy_cluster_upstream_rq_time_bucket{consul_service=\"$service\"}[30s])) by (le))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "p99", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "LATENCY", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "content": "
\n INBOUND TRAFFIC BY SOURCE\n
", + "gridPos": { + "h": 2.2, + "w": 24, + "x": 0, + "y": 16.6 + }, + "id": 32, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 18.8 + }, + "id": 77, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(envoy_cluster_upstream_rq_xx{response_code_class!=\"5\",consul_service=\"$service\"}[30s])) by (consul_service, local_cluster) / sum(irate(envoy_cluster_upstream_rq_xx{consul_service=\"$service\"}[30s])) by (consul_service, local_cluster)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "source:{{local_cluster}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "SUCCESS RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 18.8 + }, + "id": 78, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(envoy_cluster_upstream_rq_total{consul_service=\"$service\"}[30s])) by (consul_service, local_cluster)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "src: {{local_cluster}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "REQUEST RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "rps", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 18.8 + }, + "id": 79, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.95, sum(rate(envoy_cluster_upstream_rq_time_bucket{consul_service=\"$service\"}[30s])) by (le, consul_service, local_cluster))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "src: {{local_cluster}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "P95 LATENCY", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "content": "
\n OUTBOUND TRAFFIC BY UPSTREAM\n
", + "gridPos": { + "h": 2.2, + "w": 24, + "x": 0, + "y": 25.8 + }, + "id": 82, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 28 + }, + "id": 83, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(envoy_cluster_upstream_rq_xx{response_code_class!=\"5\",local_cluster=\"$service\",consul_target!=\"\"}[30s])) by (local_cluster, consul_target) / sum(irate(envoy_cluster_upstream_rq_xx{local_cluster=\"$service\",consul_target!=\"\"}[30s])) by (local_cluster, consul_target)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{consul_target}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "SUCCESS RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 28 + }, + "id": 84, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(envoy_cluster_upstream_rq_total{local_cluster=\"$service\",consul_target!=\"\"}[30s])) by (local_cluster, consul_target)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "upstream: {{consul_target}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "REQUEST RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "rps", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 28 + }, + "id": 85, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.95, sum(rate(envoy_cluster_upstream_rq_time_bucket{local_cluster=\"$service\",consul_target!=\"\"}[30s])) by (le, local_cluster, consul_target))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "upstream: {{consul_target}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "P95 LATENCY", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "5s", + "schemaVersion": 19, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "text": "ping", + "value": "ping" + }, + "datasource": "Prometheus", + "definition": "label_values(envoy_http_rq_total, local_cluster)", + "hide": 0, + "includeAll": false, + "label": "Service", + "multi": false, + "name": "service", + "options": [], + "query": "label_values(envoy_http_rq_total, local_cluster)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "Prometheus", + "definition": "label_values(envoy_cluster_upstream_rq_completed{consul_service=\"$service\"}, local_cluster)", + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "inbound", + "options": [], + "query": "label_values(envoy_cluster_upstream_rq_completed{consul_service=\"$service\"}, local_cluster)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "Prometheus", + "definition": "label_values(envoy_cluster_external_upstream_rq_completed{local_cluster=\"$service\"}, consul_service)", + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "outbound", + "options": [], + "query": "label_values(envoy_cluster_external_upstream_rq_completed{local_cluster=\"$service\"}, consul_service)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Consul 1.6 SNI Dashboard", + "uid": "consul-service", + "version": 7 +} diff --git a/gen.go b/gen.go index c88e2b0..2d398ee 100644 --- a/gen.go +++ b/gen.go @@ -7,7 +7,10 @@ import ( "flag" "fmt" "io/ioutil" + "net" "os" + "path/filepath" + "sort" "strconv" "strings" "text/template" @@ -38,12 +41,34 @@ func (t *Tool) commandGen() error { }) } + if err := t.generateComposeFile(); err != nil { + return err + } + + if t.config.Monitor.Prometheus { + if err := t.generatePrometheusConfigFile(); err != nil { + return err + } + if err := t.generateGrafanaConfigFiles(); err != nil { + return err + } + } + + return nil +} + +func (t *Tool) generateComposeFile() error { info := composeInfo{ Config: t.config, RuntimeConfig: t.runtimeConfig, } - err = t.topology.Walk(func(node Node) error { + if t.config.Monitor.Prometheus { + info.Volumes = append(info.Volumes, "prometheus-data") + info.Volumes = append(info.Volumes, "grafana-data") + } + + err := t.topology.Walk(func(node Node) error { podName := node.Name + "-pod" podHCL, err := t.generateAgentHCL(node) @@ -94,25 +119,6 @@ func (t *Tool) commandGen() error { return t.updateFileIfDifferent(out.Bytes(), "docker-compose.yml", 0644) } -func (t *Tool) updateFileIfDifferent(body []byte, path string, perm os.FileMode) error { - prev, err := ioutil.ReadFile(path) - if err != nil { - if !os.IsNotExist(err) { - return err - } - t.logger.Info("writing new file", "path", path) - } else { - // loaded - if bytes.Equal(body, prev) { - return nil - } - t.logger.Info("file has changed", "path", path) - } - - _, err = safeio.WriteToFile(bytes.NewReader(body), path, perm) - return err -} - type composeInfo struct { Config *Config RuntimeConfig RuntimeConfig @@ -154,6 +160,29 @@ volumes: # https://yipee.io/2017/06/getting-kubernetes-pod-features-using-native-docker-commands/ services: +{{- if .Config.Monitor.Prometheus }} + prometheus: + image: prom/prometheus:latest + restart: always + dns: 8.8.8.8 + volumes: + - 'prometheus-data:/prometheus-data' + - './cache/prometheus.yml:/etc/prometheus/prometheus.yml:ro' + networks: + consul: + ipv4_address: '10.0.100.1' + + grafana: + network_mode: 'service:prometheus' + image: grafana/grafana:latest + restart: always + init: true + volumes: + - 'grafana-data:/var/lib/grafana' + - './cache/grafana-prometheus.yml:/etc/grafana/provisioning/datasources/prometheus.yml:ro' + - './cache/grafana.ini:/etc/grafana/grafana.ini:ro' +{{- end }} + {{- range .Pods }} {{.PodName}}: container_name: '{{.PodName}}' @@ -186,7 +215,9 @@ services: func (t *Tool) generatePingPongYAML(podName string, node Node) (string, error) { var extraYAML bytes.Buffer - for _, svc := range node.Services { + if node.Service != nil { + svc := node.Service + switch svc.Name { case "ping", "pong": default: @@ -237,6 +268,7 @@ func (t *Tool) generatePingPongYAML(podName string, node Node) (string, error) { return "", err } } + return extraYAML.String(), nil } @@ -353,6 +385,7 @@ func (t *Tool) generateAgentHCL(node Node) (string, error) { Server: node.Server, GossipKey: t.runtimeConfig.GossipKey, TLS: t.config.Encryption.TLS, + Prometheus: t.config.Monitor.Prometheus, } if node.Server { @@ -389,6 +422,7 @@ type consulAgentConfigInfo struct { GossipKey string TLS bool TLSFilePrefix string + Prometheus bool } var consulAgentConfigT = template.Must(template.New("consul-agent-config").Parse(` @@ -411,6 +445,12 @@ retry_join_wan = [ {{.RetryJoinWAN}} ] server = {{.Server}} ui = true +{{ if .Prometheus }} +telemetry { + prometheus_retention_time = "168h" +} +{{- end }} + {{ if .GossipKey }} encrypt = "{{.GossipKey}}" {{- end }} @@ -469,3 +509,225 @@ func indent(s string, n int) string { return buf.String() } + +func (t *Tool) generatePrometheusConfigFile() error { + type kv struct { + Key, Val string + } + type job struct { + Name string + MetricsPath string + Params map[string][]string + Targets []string + Labels []kv + } + + jobs := make(map[string]*job) + add := func(j *job) { + prev, ok := jobs[j.Name] + if ok { + // only retain targets + prev.Targets = append(prev.Targets, j.Targets...) + j = prev + } else { + sort.Slice(j.Labels, func(a, b int) bool { + return j.Labels[a].Key < j.Labels[b].Key + }) + jobs[j.Name] = j + } + sort.Strings(j.Targets) + } + + err := t.topology.Walk(func(node Node) error { + if node.Server { + add(&job{ + Name: "consul-servers-" + node.Datacenter, + MetricsPath: "/v1/agent/metrics", + Params: map[string][]string{ + "format": []string{"prometheus"}, + "token": []string{t.runtimeConfig.AgentMasterToken}, + }, + Targets: []string{ + net.JoinHostPort(node.IPAddress, "8500"), + }, + Labels: []kv{ + {"dc", node.Datacenter}, + // {"node", node.Name}, + {"role", "consul-server"}, + }, + }) + } else { + add(&job{ + Name: "consul-clients-" + node.Datacenter, + MetricsPath: "/v1/agent/metrics", + Params: map[string][]string{ + "format": []string{"prometheus"}, + "token": []string{t.runtimeConfig.AgentMasterToken}, + }, + Targets: []string{ + net.JoinHostPort(node.IPAddress, "8500"), + }, + Labels: []kv{ + {"dc", node.Datacenter}, + // {"node", node.Name}, + {"role", "consul-client"}, + }, + }) + + if node.MeshGateway { + add(&job{ + Name: "mesh-gateways-" + node.Datacenter, + MetricsPath: "/metrics", + Targets: []string{ + net.JoinHostPort(node.IPAddress, "9102"), + }, + Labels: []kv{ + {"dc", node.Datacenter}, + // {"node", node.Name}, + {"role", "mesh-gateway"}, + }, + }) + } else if node.Service != nil { + add(&job{ + Name: node.Service.Name + "-proxy", + MetricsPath: "/metrics", + Targets: []string{ + net.JoinHostPort(node.IPAddress, "9102"), + }, + Labels: []kv{ + {"dc", node.Datacenter}, + // {"node", node.Name}, + {"role", node.Service.Name + "-proxy"}, + }, + }) + } + } + + return nil + }) + if err != nil { + return err + } + + info := struct { + Jobs []*job + }{} + for _, j := range jobs { + info.Jobs = append(info.Jobs, j) + } + sort.Slice(info.Jobs, func(i, j int) bool { + return info.Jobs[i].Name < info.Jobs[j].Name + }) + + var out bytes.Buffer + if err := prometheusConfigT.Execute(&out, &info); err != nil { + return err + } + + return t.updateFileIfDifferent(out.Bytes(), "cache/prometheus.yml", 0644) +} + +var prometheusConfigT = template.Must(template.New("prometheus").Parse(` +# my global config +global: + scrape_interval: 5s + evaluation_interval: 5s + +# Alertmanager configuration +alerting: + alertmanagers: + - static_configs: + - targets: + # - alertmanager:9093 + +# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. +rule_files: + # - "first_rules.yml" + # - "second_rules.yml" + +# A scrape configuration containing exactly one endpoint to scrape: +# Here it's Prometheus itself. +scrape_configs: + - job_name: 'prometheus' + + # metrics_path defaults to '/metrics' + # scheme defaults to 'http'. + + static_configs: + - targets: ['localhost:9090'] + +{{- range .Jobs }} + + - job_name: {{.Name}} + metrics_path: "{{.MetricsPath}}" + params: +{{- range $k, $vl := .Params }} + {{ $k }}: +{{- range $vl }} + - {{ . }} +{{- end}} +{{- end}} + static_configs: + - targets: +{{- range .Targets }} + - "{{ . }}" +{{- end }} + labels: +{{- range .Labels }} + {{ .Key }}: "{{ .Val }}" +{{- end }} +{{- end }} +`)) + +func (t *Tool) generateGrafanaConfigFiles() error { + files := map[string]string{ + "grafana-prometheus.yml": ` +apiVersion: 1 + +datasources: +- name: Prometheus + type: prometheus + access: proxy + url: http://localhost:9090 + isDefault: true + version: 1 + editable: false +`, + "grafana.ini": ` +[auth.anonymous] +enabled = true + +# Organization name that should be used for unauthenticated users +org_name = Main Org. + +# Role for unauthenticated users, other valid values are 'Editor' and 'Admin' +org_role = Admin +`, + } + + for name, body := range files { + if err := t.updateFileIfDifferent([]byte(body), filepath.Join("cache", name), 0644); err != nil { + return err + } + } + return nil +} + +func (t *Tool) updateFileIfDifferent(body []byte, path string, perm os.FileMode) error { + prev, err := ioutil.ReadFile(path) + if err != nil { + if !os.IsNotExist(err) { + return err + } + t.logger.Info("writing new file", "path", path) + } else { + // loaded + if bytes.Equal(body, prev) { + return nil + } + t.logger.Info("file has changed", "path", path) + } + + _, err = safeio.WriteToFile(bytes.NewReader(body), path, perm) + return err +} diff --git a/go.mod b/go.mod index 2d35216..ce9c7de 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/armon/go-metrics v0.0.0-20190423201044-2801d9688273 // indirect github.com/google/btree v1.0.0 // indirect github.com/hashicorp/consul/api v1.1.1-0.20190807163521-3a3086ecd22d + github.com/hashicorp/go-cleanhttp v0.5.1 github.com/hashicorp/go-hclog v0.9.2 github.com/hashicorp/go-msgpack v0.5.4 // indirect github.com/hashicorp/go-sockaddr v1.0.2 // indirect diff --git a/sidecar-boot.sh b/sidecar-boot.sh index 6833d66..ef80fed 100755 --- a/sidecar-boot.sh +++ b/sidecar-boot.sh @@ -68,6 +68,15 @@ case "${mode}" in # whitespace in the middle so :shrug: token="${token//[[:space:]]}" + while : ; do + if consul acl token read -token-file "${token_file}" -self &> /dev/null ; then + break + fi + + echo "waiting for ACLs to work..." + sleep 0.1 + done + echo "Registering service..." consul services register -token-file "${token_file}" "${service_register_file}" diff --git a/tool_config.go b/tool_config.go index c2b1000..43543db 100644 --- a/tool_config.go +++ b/tool_config.go @@ -33,6 +33,7 @@ type Config struct { Encryption ConfigEncryption `hcl:"encryption"` Kubernetes ConfigKubernetes `hcl:"kubernetes"` Envoy ConfigEnvoy `hcl:"envoy"` + Monitor ConfigMonitor `hcl:"monitor"` Topology ConfigTopology `hcl:"topology"` InitialMasterToken string `hcl:"initial_master_token"` RawConfigEntries []string `hcl:"config_entries"` @@ -48,6 +49,9 @@ type ConfigKubernetes struct { type ConfigEnvoy struct { LogLevel string `hcl:"log_level"` } +type ConfigMonitor struct { + Prometheus bool `hcl:"prometheus"` +} type ConfigTopology struct { Servers ConfigTopologyDatacenter `hcl:"servers"` @@ -63,6 +67,7 @@ type ConfigTopologyDatacenter struct { type ConfigTopologyNodeConfig struct { UpstreamName string `hcl:"upstream_name"` UpstreamDatacenter string `hcl:"upstream_datacenter"` + UpstreamExtraHCL string `hcl:"upstream_extra_hcl"` ServiceMeta map[string]string `hcl:"service_meta"` // key -> val MeshGateway bool `hcl:"mesh_gateway"` UseBuiltinProxy bool `hcl:"use_builtin_proxy"` diff --git a/topology.go b/topology.go index 026bd8c..48ea2e3 100644 --- a/topology.go +++ b/topology.go @@ -64,6 +64,7 @@ func InferTopology(c *Config) (*Topology, error) { svc := Service{ Port: 8080, UpstreamLocalPort: 9090, + UpstreamExtraHCL: nodeConfig.UpstreamExtraHCL, Meta: nodeConfig.Meta(), } if idx%2 == 1 { @@ -81,7 +82,7 @@ func InferTopology(c *Config) (*Topology, error) { svc.UpstreamDatacenter = nodeConfig.UpstreamDatacenter } - node.Services = []Service{svc} + node.Service = &svc } addNode(node) @@ -156,23 +157,24 @@ func (t *Topology) WalkSilent(f func(n Node)) { } type Node struct { - Datacenter string `hcl:"datacenter"` - Name string `hcl:"name,key"` - Server bool `hcl:"server"` - IPAddress string `hcl:"ip_address"` - Services []Service `hcl:"service"` - MeshGateway bool `hcl:"mesh_gateway"` - UseBuiltinProxy bool `hcl:"use_builtin_proxy"` - Index int `hcl:"-"` + Datacenter string + Name string + Server bool + IPAddress string + Service *Service + MeshGateway bool + UseBuiltinProxy bool + Index int } func (n *Node) TokenName() string { return "agent--" + n.Name } type Service struct { - Name string `hcl:"name,key"` - Port int `hcl:"port"` - UpstreamName string `hcl:"upstream_name"` - UpstreamDatacenter string `hcl:"upstream_datacenter"` - UpstreamLocalPort int `hcl:"upstream_local_port"` - Meta map[string]string `hcl:"meta"` + Name string + Port int + UpstreamName string + UpstreamDatacenter string + UpstreamLocalPort int + UpstreamExtraHCL string + Meta map[string]string }