diff --git a/services/horizon/CHANGELOG.md b/services/horizon/CHANGELOG.md index 8ff6e84d49..bd173723cb 100644 --- a/services/horizon/CHANGELOG.md +++ b/services/horizon/CHANGELOG.md @@ -12,6 +12,7 @@ This is a minor release with no DB Schema migrations nor explicit state rebuild. * Use the correct asset when calculating liquidity pool disbursements ([4018](https://github.com/stellar/go/pull/4018)) * Make sure Stellar-Core is not started before previous instance termination ([4020](https://github.com/stellar/go/pull/4020)) * Add a new feature flag `--ingest-enable-extended-log-ledger-stats` (`false` by default) that enables extra ledger stats when logging ledger processing info ([4017](https://github.com/stellar/go/pull/4017)) +* Add a new command `horizon record-metrics` that records `:[ADMIN_PORT]/metrics` into a zip file for debugging purposes ([4023](https://github.com/stellar/go/pull/4023)) * Expose the `Latest-Ledger` header to browser web pages ([3995](https://github.com/stellar/go/pull/3995)) * Correct `horizon db reingest range` output command name when invoking `horizon db detect-gaps` ([4007](https://github.com/stellar/go/pull/4007)) * Add new prometheus metrics: diff --git a/services/horizon/cmd/record_metrics.go b/services/horizon/cmd/record_metrics.go new file mode 100644 index 0000000000..a69ae379c8 --- /dev/null +++ b/services/horizon/cmd/record_metrics.go @@ -0,0 +1,89 @@ +package cmd + +import ( + "archive/zip" + "fmt" + "io" + "net/http" + "os" + "time" + + "github.com/spf13/cobra" + horizon "github.com/stellar/go/services/horizon/internal" + "github.com/stellar/go/support/errors" + "github.com/stellar/go/support/log" +) + +var recordMetricsCmd = &cobra.Command{ + Use: "record-metrics", + Short: "records `/metrics` on admin port for debuging purposes", + Long: "", + RunE: func(cmd *cobra.Command, args []string) error { + if err := horizon.ApplyFlags(config, flags, horizon.ApplyOptions{}); err != nil { + return err + } + + const ( + timeFormat = "2006-01-02-15-04-05" + scrapeIntervalSeconds = 15 + scrapesCount = (60 / scrapeIntervalSeconds) * 10 // remember about rounding if change is required + ) + + client := &http.Client{ + Timeout: 2 * time.Second, + } + + outputFileName := fmt.Sprintf("./metrics-%s.zip", time.Now().Format(timeFormat)) + outputFile, err := os.Create(outputFileName) + if err != nil { + return err + } + + w := zip.NewWriter(outputFile) + defer w.Close() + + for i := 1; i <= scrapesCount; i++ { + log.Infof( + "Getting metrics %d/%d... ETA: %s", + i, + scrapesCount, + time.Duration(time.Duration(scrapeIntervalSeconds*(scrapesCount-i))*time.Second), + ) + + metricsResponse, err := client.Get(fmt.Sprintf("http://127.0.0.1:%d/metrics", config.AdminPort)) + if err != nil { + return errors.Wrap(err, "Error fetching metrics. Is admin server running?") + } + + if metricsResponse.StatusCode != http.StatusOK { + return errors.Errorf("Invalid status code: %d. Is admin server running?", metricsResponse.StatusCode) + } + + metricsFile, err := w.Create(time.Now().Format(timeFormat)) + if err != nil { + return err + } + + if _, err = io.Copy(metricsFile, metricsResponse.Body); err != nil { + return errors.Wrap(err, "Error reading response body. Is admin server running?") + } + + // Flush to keep memory usage log and save at least some records in case of errors later. + err = w.Flush() + if err != nil { + return err + } + + if i < scrapesCount { + time.Sleep(scrapeIntervalSeconds * time.Second) + } + } + + log.Infof("Metrics recorded to %s!", outputFileName) + return nil + }, +} + +func init() { + RootCmd.AddCommand(recordMetricsCmd) +}