Skip to content

Commit

Permalink
bugtool: Dump envoy metrics for troubleshooting
Browse files Browse the repository at this point in the history
[ upstream commit 0307add ]

Users might not have prometheus metrics endpoint enabled as part of
existing Cilium installation. This commit is to add the capability to
dump envoy metrics without the need of re-installation with additional
helm flag, or updating existing cilium config map. One common use case is
to check if there is any connectivity issue (e.g. 503, timeout, etc) for
egress traffic.

For example, the below metrics are part of the dump, these two metrics
clearly signal some configuration issues with TLS egress.

```bash
envoy_cluster_upstream_rq{envoy_response_code="503",envoy_cluster_name="egress-cluster-tls"} 100
envoy_cluster_upstream_cx_connect_fail{envoy_cluster_name="egress-cluster-tls"} 300
```

Testing was done locally by running curl command in pod manually

```bash
$ kubectl exec -n kube-system ds/cilium -- curl --unix-socket /var/run/cilium/envoy-admin.sock http:/admin/stats/prometheus > metrics_dump.txt
$ cat metrics_dump.txt | wc -l
  2753
```

Signed-off-by: Tam Mach <[email protected]>
Signed-off-by: Paul Chaignon <[email protected]>
  • Loading branch information
sayboras authored and pchaigno committed Feb 15, 2023
1 parent cf79acc commit 1aafafe
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 3 deletions.
1 change: 1 addition & 0 deletions Documentation/cmdref/cilium-bugtool.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 11 additions & 3 deletions bugtool/cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ var (
archivePrefix string
getPProf bool
envoyDump bool
envoyMetrics bool
pprofPort int
traceSeconds int
parallelWorkers int
Expand All @@ -81,6 +82,7 @@ func init() {
BugtoolRootCmd.Flags().BoolVar(&archive, "archive", true, "Create archive when false skips deletion of the output directory")
BugtoolRootCmd.Flags().BoolVar(&getPProf, "get-pprof", false, "When set, only gets the pprof traces from the cilium-agent binary")
BugtoolRootCmd.Flags().BoolVar(&envoyDump, "envoy-dump", true, "When set, dump envoy configuration from unix socket")
BugtoolRootCmd.Flags().BoolVar(&envoyMetrics, "envoy-metrics", true, "When set, dump envoy prometheus metrics from unix socket")
BugtoolRootCmd.Flags().IntVar(&pprofPort,
"pprof-port", defaults.PprofPortAgent,
fmt.Sprintf(
Expand Down Expand Up @@ -208,11 +210,17 @@ func runTool() {
}
} else {
if envoyDump {
if err := dumpEnvoy(cmdDir); err != nil {
if err := dumpEnvoy(cmdDir, "http://admin/config_dump?include_eds", "envoy-config.json"); err != nil {
fmt.Fprintf(os.Stderr, "Unable to dump envoy config: %s\n", err)
}
}

if envoyMetrics {
if err := dumpEnvoy(cmdDir, "http://admin/stats/prometheus", "envoy-metrics.txt"); err != nil {
fmt.Fprintf(os.Stderr, "Unable to retrieve envoy prometheus metrics: %s\n", err)
}
}

// Check if there is a user supplied configuration
if config, _ := loadConfigFile(configPath); config != nil {
// All of of the commands run are from the configuration file
Expand Down Expand Up @@ -490,7 +498,7 @@ func getCiliumPods(namespace, label string) ([]string, error) {
return ciliumPods, nil
}

func dumpEnvoy(rootDir string) error {
func dumpEnvoy(rootDir string, resource string, fileName string) error {
// curl --unix-socket /var/run/cilium/envoy-admin.sock http:/admin/config_dump\?include_eds > dump.json
c := &http.Client{
Transport: &http.Transport{
Expand All @@ -499,7 +507,7 @@ func dumpEnvoy(rootDir string) error {
},
},
}
return downloadToFile(c, "http://admin/config_dump?include_eds", filepath.Join(rootDir, "envoy-config.json"))
return downloadToFile(c, resource, filepath.Join(rootDir, fileName))
}

func pprofTraces(rootDir string) error {
Expand Down

0 comments on commit 1aafafe

Please sign in to comment.