diff --git a/.gitignore b/.gitignore index 2c00bb3d..253498f8 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ build/* data/*.yaml !.github/workflows/*.yaml -helm/easeprobe/charts/ \ No newline at end of file +helm/easeprobe/charts/ +vendor/ diff --git a/probe/base/base.go b/probe/base/base.go index cb26c45c..817729c5 100644 --- a/probe/base/base.go +++ b/probe/base/base.go @@ -230,19 +230,23 @@ func (d *DefaultProbe) ExportMetrics() { time = d.ProbeResult.Stat.DownTime } + // Add endpoint label according to ProbeKind(tcp/http/ping/host/...) d.metrics.TotalCnt.With(prometheus.Labels{ - "name": d.ProbeName, - "status": d.ProbeResult.Status.String(), + "name": d.ProbeName, + "status": d.ProbeResult.Status.String(), + "endpoint": d.ProbeResult.Endpoint, }).Set(float64(cnt)) d.metrics.TotalTime.With(prometheus.Labels{ - "name": d.ProbeName, - "status": d.ProbeResult.Status.String(), + "name": d.ProbeName, + "status": d.ProbeResult.Status.String(), + "endpoint": d.ProbeResult.Endpoint, }).Set(float64(time.Seconds())) d.metrics.Duration.With(prometheus.Labels{ - "name": d.ProbeName, - "status": d.ProbeResult.Status.String(), + "name": d.ProbeName, + "status": d.ProbeResult.Status.String(), + "endpoint": d.ProbeResult.Endpoint, }).Set(float64(d.ProbeResult.RoundTripTime.Milliseconds())) status := ServiceUp // up @@ -250,11 +254,13 @@ func (d *DefaultProbe) ExportMetrics() { status = ServiceDown // down } d.metrics.Status.With(prometheus.Labels{ - "name": d.ProbeName, + "name": d.ProbeName, + "endpoint": d.ProbeResult.Endpoint, }).Set(float64(status)) d.metrics.SLA.With(prometheus.Labels{ - "name": d.ProbeName, + "name": d.ProbeName, + "endpoint": d.ProbeResult.Endpoint, }).Set(float64(d.ProbeResult.SLAPercent())) } diff --git a/probe/base/metrics.go b/probe/base/metrics.go index d5b951f0..db733d90 100644 --- a/probe/base/metrics.go +++ b/probe/base/metrics.go @@ -37,14 +37,14 @@ func newMetrics(subsystem, name string) *metrics { namespace := global.GetEaseProbe().Name return &metrics{ TotalCnt: metric.NewGauge(namespace, subsystem, name, "total", - "Total Probed Counts", []string{"name", "status"}), + "Total Probed Counts", []string{"name", "status", "endpoint"}), TotalTime: metric.NewGauge(namespace, subsystem, name, "total_time", - "Total Time(Seconds) of Status", []string{"name", "status"}), + "Total Time(Seconds) of Status", []string{"name", "status", "endpoint"}), Duration: metric.NewGauge(namespace, subsystem, name, "duration", - "Probe Duration", []string{"name", "status"}), + "Probe Duration", []string{"name", "status", "endpoint"}), Status: metric.NewGauge(namespace, subsystem, name, "status", - "Probe Status", []string{"name"}), + "Probe Status", []string{"name", "endpoint"}), SLA: metric.NewGauge(namespace, subsystem, name, "sla", - "Probe SLA", []string{"name"}), + "Probe SLA", []string{"name", "endpoint"}), } } diff --git a/probe/http/http.go b/probe/http/http.go index ab7d3d07..c8f4fab8 100644 --- a/probe/http/http.go +++ b/probe/http/http.go @@ -279,47 +279,56 @@ func (h *HTTP) ExportMetrics(resp *http.Response) { len = int(resp.ContentLength) } h.metrics.StatusCode.With(prometheus.Labels{ - "name": h.ProbeName, - "status": fmt.Sprintf("%d", code), + "name": h.ProbeName, + "status": fmt.Sprintf("%d", code), + "endpoint": h.ProbeResult.Endpoint, }).Inc() h.metrics.ContentLen.With(prometheus.Labels{ - "name": h.ProbeName, - "status": fmt.Sprintf("%d", code), + "name": h.ProbeName, + "status": fmt.Sprintf("%d", code), + "endpoint": h.ProbeResult.Endpoint, }).Set(float64(len)) h.metrics.DNSDuration.With(prometheus.Labels{ - "name": h.ProbeName, - "status": fmt.Sprintf("%d", code), + "name": h.ProbeName, + "status": fmt.Sprintf("%d", code), + "endpoint": h.ProbeResult.Endpoint, }).Set(toMS(h.traceStats.dnsTook)) h.metrics.ConnectDuration.With(prometheus.Labels{ - "name": h.ProbeName, - "status": fmt.Sprintf("%d", code), + "name": h.ProbeName, + "status": fmt.Sprintf("%d", code), + "endpoint": h.ProbeResult.Endpoint, }).Set(toMS(h.traceStats.connTook)) h.metrics.TLSDuration.With(prometheus.Labels{ - "name": h.ProbeName, - "status": fmt.Sprintf("%d", code), + "name": h.ProbeName, + "status": fmt.Sprintf("%d", code), + "endpoint": h.ProbeResult.Endpoint, }).Set(toMS(h.traceStats.tlsTook)) h.metrics.SendDuration.With(prometheus.Labels{ - "name": h.ProbeName, - "status": fmt.Sprintf("%d", code), + "name": h.ProbeName, + "status": fmt.Sprintf("%d", code), + "endpoint": h.ProbeResult.Endpoint, }).Set(toMS(h.traceStats.sendTook)) h.metrics.WaitDuration.With(prometheus.Labels{ - "name": h.ProbeName, - "status": fmt.Sprintf("%d", code), + "name": h.ProbeName, + "status": fmt.Sprintf("%d", code), + "endpoint": h.ProbeResult.Endpoint, }).Set(toMS(h.traceStats.waitTook)) h.metrics.TransferDuration.With(prometheus.Labels{ - "name": h.ProbeName, - "status": fmt.Sprintf("%d", code), + "name": h.ProbeName, + "status": fmt.Sprintf("%d", code), + "endpoint": h.ProbeResult.Endpoint, }).Set(toMS(h.traceStats.transferTook)) h.metrics.TotalDuration.With(prometheus.Labels{ - "name": h.ProbeName, - "status": fmt.Sprintf("%d", code), + "name": h.ProbeName, + "status": fmt.Sprintf("%d", code), + "endpoint": h.ProbeResult.Endpoint, }).Set(toMS(h.traceStats.totalTook)) } diff --git a/probe/http/metrics.go b/probe/http/metrics.go index bc65f8a5..17aeccc8 100644 --- a/probe/http/metrics.go +++ b/probe/http/metrics.go @@ -41,22 +41,22 @@ func newMetrics(subsystem, name string) *metrics { namespace := global.GetEaseProbe().Name return &metrics{ StatusCode: metric.NewCounter(namespace, subsystem, name, "status_code", - "HTTP Status Code", []string{"name", "status"}), + "HTTP Status Code", []string{"name", "status", "endpoint"}), ContentLen: metric.NewGauge(namespace, subsystem, name, "content_len", - "HTTP Content Length", []string{"name", "status"}), + "HTTP Content Length", []string{"name", "status", "endpoint"}), DNSDuration: metric.NewGauge(namespace, subsystem, name, "dns_duration", - "DNS Duration", []string{"name", "status"}), + "DNS Duration", []string{"name", "status", "endpoint"}), ConnectDuration: metric.NewGauge(namespace, subsystem, name, "connect_duration", - "TCP Connection Duration", []string{"name", "status"}), + "TCP Connection Duration", []string{"name", "status", "endpoint"}), TLSDuration: metric.NewGauge(namespace, subsystem, name, "tls_duration", - "TLS Duration", []string{"name", "status"}), + "TLS Duration", []string{"name", "status", "endpoint"}), SendDuration: metric.NewGauge(namespace, subsystem, name, "send_duration", - "Send Duration", []string{"name", "status"}), + "Send Duration", []string{"name", "status", "endpoint"}), WaitDuration: metric.NewGauge(namespace, subsystem, name, "wait_duration", - "Wait Duration", []string{"name", "status"}), + "Wait Duration", []string{"name", "status", "endpoint"}), TransferDuration: metric.NewGauge(namespace, subsystem, name, "transfer_duration", - "Transfer Duration", []string{"name", "status"}), + "Transfer Duration", []string{"name", "status", "endpoint"}), TotalDuration: metric.NewGauge(namespace, subsystem, name, "total_duration", - "Total Duration", []string{"name", "status"}), + "Total Duration", []string{"name", "status", "endpoint"}), } } diff --git a/probe/ping/metrics.go b/probe/ping/metrics.go index 887bd8e9..f5223455 100644 --- a/probe/ping/metrics.go +++ b/probe/ping/metrics.go @@ -38,18 +38,18 @@ func newMetrics(subsystem, name string) *metrics { namespace := global.GetEaseProbe().Name return &metrics{ PacketsSent: metric.NewCounter(namespace, subsystem, name, "sent", - "Total Package Sent", []string{"name"}), + "Total Package Sent", []string{"name", "endpoint"}), PacketsRecv: metric.NewCounter(namespace, subsystem, name, "recv", - "Total Package Received", []string{"name"}), + "Total Package Received", []string{"name", "endpoint"}), PacketLoss: metric.NewGauge(namespace, subsystem, name, "loss", - "Package Loss Percentage", []string{"name"}), + "Package Loss Percentage", []string{"name", "endpoint"}), MinRtt: metric.NewGauge(namespace, subsystem, name, "min_rtt", - "Minimum Round Trip Time", []string{"name"}), + "Minimum Round Trip Time", []string{"name", "endpoint"}), MaxRtt: metric.NewGauge(namespace, subsystem, name, "max_rtt", - "Maximum Round Trip Time", []string{"name"}), + "Maximum Round Trip Time", []string{"name", "endpoint"}), AvgRtt: metric.NewGauge(namespace, subsystem, name, "avg_rtt", - "Average Round Trip Time", []string{"name"}), + "Average Round Trip Time", []string{"name", "endpoint"}), StdDevRtt: metric.NewGauge(namespace, subsystem, name, "stddev_rtt", - "Standard Deviation of Round Trip Time", []string{"name"}), + "Standard Deviation of Round Trip Time", []string{"name", "endpoint"}), } } diff --git a/probe/ping/ping.go b/probe/ping/ping.go index b8415b69..fe750837 100644 --- a/probe/ping/ping.go +++ b/probe/ping/ping.go @@ -127,30 +127,37 @@ func (p *Ping) DoProbe() (bool, string) { // ExportMetrics export Ping metrics func (p *Ping) ExportMetrics(stats *ping.Statistics) { p.metrics.PacketsSent.With(prometheus.Labels{ - "name": p.ProbeName, + "name": p.ProbeName, + "endpoint": p.ProbeResult.Endpoint, }).Add(float64(stats.PacketsSent)) p.metrics.PacketsRecv.With(prometheus.Labels{ - "name": p.ProbeName, + "name": p.ProbeName, + "endpoint": p.ProbeResult.Endpoint, }).Add(float64(stats.PacketsRecv)) p.metrics.PacketLoss.With(prometheus.Labels{ - "name": p.ProbeName, + "name": p.ProbeName, + "endpoint": p.ProbeResult.Endpoint, }).Set(stats.PacketLoss) p.metrics.MaxRtt.With(prometheus.Labels{ - "name": p.ProbeName, + "name": p.ProbeName, + "endpoint": p.ProbeResult.Endpoint, }).Set(float64(stats.MaxRtt.Milliseconds())) p.metrics.MinRtt.With(prometheus.Labels{ - "name": p.ProbeName, + "name": p.ProbeName, + "endpoint": p.ProbeResult.Endpoint, }).Set(float64(stats.MinRtt.Milliseconds())) p.metrics.AvgRtt.With(prometheus.Labels{ - "name": p.ProbeName, + "name": p.ProbeName, + "endpoint": p.ProbeResult.Endpoint, }).Set(float64(stats.AvgRtt.Milliseconds())) p.metrics.StdDevRtt.With(prometheus.Labels{ - "name": p.ProbeName, + "name": p.ProbeName, + "endpoint": p.ProbeResult.Endpoint, }).Set(float64(stats.StdDevRtt.Milliseconds())) } diff --git a/probe/shell/metrics.go b/probe/shell/metrics.go index aaeb3181..ec27bf50 100644 --- a/probe/shell/metrics.go +++ b/probe/shell/metrics.go @@ -34,8 +34,8 @@ func newMetrics(subsystem, name string) *metrics { namespace := global.GetEaseProbe().Name return &metrics{ ExitCode: metric.NewCounter(namespace, subsystem, name, "exit_code", - "Exit Code", []string{"name", "exit"}), + "Exit Code", []string{"name", "exit", "endpoint"}), OutputLen: metric.NewGauge(namespace, subsystem, name, "output_len", - "Output Length", []string{"name", "exit"}), + "Output Length", []string{"name", "exit", "endpoint"}), } } diff --git a/probe/shell/shell.go b/probe/shell/shell.go index 527b8d34..bb6b1f42 100644 --- a/probe/shell/shell.go +++ b/probe/shell/shell.go @@ -116,12 +116,14 @@ func (s *Shell) DoProbe() (bool, string) { // ExportMetrics export shell metrics func (s *Shell) ExportMetrics() { s.metrics.ExitCode.With(prometheus.Labels{ - "name": s.ProbeName, - "exit": fmt.Sprintf("%d", s.exitCode), + "name": s.ProbeName, + "exit": fmt.Sprintf("%d", s.exitCode), + "endpoint": s.ProbeResult.Endpoint, }).Inc() s.metrics.OutputLen.With(prometheus.Labels{ - "name": s.ProbeName, - "exit": fmt.Sprintf("%d", s.exitCode), + "name": s.ProbeName, + "exit": fmt.Sprintf("%d", s.exitCode), + "endpoint": s.ProbeResult.Endpoint, }).Set(float64(s.outputLen)) } diff --git a/probe/ssh/metrics.go b/probe/ssh/metrics.go index 0635b6cf..6664f976 100644 --- a/probe/ssh/metrics.go +++ b/probe/ssh/metrics.go @@ -34,8 +34,8 @@ func newMetrics(subsystem, name string) *metrics { namespace := global.GetEaseProbe().Name return &metrics{ ExitCode: metric.NewCounter(namespace, subsystem, name, "exit_code", - "Exit Code", []string{"name", "exit"}), + "Exit Code", []string{"name", "exit", "endpoint"}), OutputLen: metric.NewGauge(namespace, subsystem, name, "output_len", - "Output Length", []string{"name", "exit"}), + "Output Length", []string{"name", "exit", "endpoint"}), } } diff --git a/probe/ssh/ssh.go b/probe/ssh/ssh.go index 3dc1e1e7..6c22bb63 100644 --- a/probe/ssh/ssh.go +++ b/probe/ssh/ssh.go @@ -268,12 +268,14 @@ func (s *Server) RunSSHCmd() (string, error) { // ExportMetrics export shell metrics func (s *Server) ExportMetrics() { s.metrics.ExitCode.With(prometheus.Labels{ - "name": s.ProbeName, - "exit": fmt.Sprintf("%d", s.exitCode), + "name": s.ProbeName, + "exit": fmt.Sprintf("%d", s.exitCode), + "endpoint": s.ProbeResult.Endpoint, }).Inc() s.metrics.OutputLen.With(prometheus.Labels{ - "name": s.ProbeName, - "exit": fmt.Sprintf("%d", s.exitCode), + "name": s.ProbeName, + "exit": fmt.Sprintf("%d", s.exitCode), + "endpoint": s.ProbeResult.Endpoint, }).Set(float64(s.outputLen)) } diff --git a/probe/tls/metrics.go b/probe/tls/metrics.go index d980a22c..0156c484 100644 --- a/probe/tls/metrics.go +++ b/probe/tls/metrics.go @@ -37,9 +37,9 @@ func newMetrics(subsystem, name string) *metrics { namespace := global.GetEaseProbe().Name return &metrics{ EarliestCertExpiry: metric.NewGauge(namespace, subsystem, name, "earliest_cert_expiry", - "last TLS chain expiry in timestamp seconds", []string{}), + "last TLS chain expiry in timestamp seconds", []string{"endpoint"}), LastChainExpiryTimestampSeconds: metric.NewGauge(namespace, subsystem, name, "last_chain_expiry_timestamp_seconds", - "earliest TLS cert expiry in unix time", []string{}), + "earliest TLS cert expiry in unix time", []string{"endpoint"}), } } diff --git a/probe/tls/tls.go b/probe/tls/tls.go index 1f2d3d7e..fb1eaf08 100644 --- a/probe/tls/tls.go +++ b/probe/tls/tls.go @@ -137,8 +137,12 @@ func (t *TLS) DoProbe() (bool, string) { state := tconn.ConnectionState() - t.metrics.EarliestCertExpiry.With(prometheus.Labels{}).Set(float64(getEarliestCertExpiry(&state).Unix())) - t.metrics.LastChainExpiryTimestampSeconds.With(prometheus.Labels{}).Set(float64(getLastChainExpiry(&state).Unix())) + t.metrics.EarliestCertExpiry.With(prometheus.Labels{ + "endpoint": t.ProbeResult.Endpoint, + }).Set(float64(getEarliestCertExpiry(&state).Unix())) + t.metrics.LastChainExpiryTimestampSeconds.With(prometheus.Labels{ + "endpoint": t.ProbeResult.Endpoint, + }).Set(float64(getLastChainExpiry(&state).Unix())) return true, "TLS Endpoint Verified Successfully!" }