diff --git a/pkg/cluster/spec/monitoring.go b/pkg/cluster/spec/monitoring.go index 38d77430f4..58f50721a1 100644 --- a/pkg/cluster/spec/monitoring.go +++ b/pkg/cluster/spec/monitoring.go @@ -17,6 +17,7 @@ import ( "context" "crypto/tls" "fmt" + "os" "path" "path/filepath" "reflect" @@ -29,6 +30,7 @@ import ( "github.com/pingcap/tiup/pkg/cluster/template/scripts" "github.com/pingcap/tiup/pkg/meta" "github.com/pingcap/tiup/pkg/set" + "gopkg.in/yaml.v3" ) // PrometheusSpec represents the Prometheus Server topology specification in topology.yaml @@ -51,6 +53,7 @@ type PrometheusSpec struct { Arch string `yaml:"arch,omitempty"` OS string `yaml:"os,omitempty"` RuleDir string `yaml:"rule_dir,omitempty" validate:"rule_dir:editable"` + AdditionalScrapeConf map[string]interface{} `yaml:"additional_scrape_conf,omitempty" validate:"additional_scrape_conf:ignore"` } // Remote prometheus remote config @@ -336,6 +339,12 @@ func (i *MonitorInstance) InitConfig( if err := cfig.ConfigToFile(fp); err != nil { return err } + if spec.AdditionalScrapeConf != nil { + err = mergeAdditionalScrapeConf(fp, spec.AdditionalScrapeConf) + if err != nil { + return err + } + } dst = filepath.Join(paths.Deploy, "conf", "prometheus.yml") if err := e.Transfer(ctx, fp, dst, false, 0, false); err != nil { return err @@ -428,3 +437,26 @@ func (i *MonitorInstance) ScaleConfig( i.topo = topo return i.InitConfig(ctx, e, clusterName, clusterVersion, deployUser, paths) } + +func mergeAdditionalScrapeConf(source string, addition map[string]interface{}) error { + var result map[string]interface{} + bytes, err := os.ReadFile(source) + if err != nil { + return err + } + err = yaml.Unmarshal(bytes, &result) + if err != nil { + return err + } + + for _, job := range result["scrape_configs"].([]interface{}) { + for k, v := range addition { + job.(map[string]interface{})[k] = v + } + } + bytes, err = yaml.Marshal(result) + if err != nil { + return err + } + return os.WriteFile(source, bytes, 0644) +} diff --git a/pkg/cluster/spec/monitoring_test.go b/pkg/cluster/spec/monitoring_test.go index ae3d0a07c1..a0a85dfb2a 100644 --- a/pkg/cluster/spec/monitoring_test.go +++ b/pkg/cluster/spec/monitoring_test.go @@ -15,6 +15,7 @@ package spec import ( "context" + "fmt" "os" "os/user" "path" @@ -25,6 +26,7 @@ import ( "github.com/pingcap/tiup/pkg/cluster/executor" "github.com/pingcap/tiup/pkg/meta" "github.com/stretchr/testify/assert" + "gopkg.in/yaml.v3" ) func TestLocalRuleDirs(t *testing.T) { @@ -128,3 +130,95 @@ groups: assert.NoFileExists(t, path.Join(deployDir, "conf", f.Name())) } } + +func TestMergeAdditionalScrapeConf(t *testing.T) { + file, err := os.CreateTemp("", "tiup-cluster-spec-test") + if err != nil { + panic(fmt.Sprintf("create temp file: %s", err)) + } + defer os.Remove(file.Name()) + + _, err = file.WriteString(`--- +global: + scrape_interval: 15s # By default, scrape targets every 15 seconds. + evaluation_interval: 15s # By default, scrape targets every 15 seconds. + # scrape_timeout is set to the global default (10s). + external_labels: + cluster: 'test' + monitor: "prometheus" + +scrape_configs: + - job_name: "tidb" + honor_labels: true # don't overwrite job & instance labels + static_configs: + - targets: + - '192.168.122.215:10080' + - job_name: "tikv" + honor_labels: true # don't overwrite job & instance labels + static_configs: + - targets: + - '192.168.122.25:20180'`) + assert.Nil(t, err) + + expected := `global: + evaluation_interval: 15s + external_labels: + cluster: test + monitor: prometheus + scrape_interval: 15s +scrape_configs: + - honor_labels: true + job_name: tidb + metric_relabel_configs: + - action: drop + regex: tikv_thread_nonvoluntary_context_switches|tikv_thread_voluntary_context_switches|tikv_threads_io_bytes_total + separator: ; + source_labels: + - __name__ + - action: drop + regex: tikv_thread_cpu_seconds_total;(tokio|rocksdb).+ + separator: ; + source_labels: + - __name__ + - name + static_configs: + - targets: + - 192.168.122.215:10080 + - honor_labels: true + job_name: tikv + metric_relabel_configs: + - action: drop + regex: tikv_thread_nonvoluntary_context_switches|tikv_thread_voluntary_context_switches|tikv_threads_io_bytes_total + separator: ; + source_labels: + - __name__ + - action: drop + regex: tikv_thread_cpu_seconds_total;(tokio|rocksdb).+ + separator: ; + source_labels: + - __name__ + - name + static_configs: + - targets: + - 192.168.122.25:20180 +` + + var addition map[string]interface{} + err = yaml.Unmarshal([]byte(`metric_relabel_configs: + - source_labels: [__name__] + separator: ; + regex: tikv_thread_nonvoluntary_context_switches|tikv_thread_voluntary_context_switches|tikv_threads_io_bytes_total + action: drop + - source_labels: [__name__,name] + separator: ; + regex: tikv_thread_cpu_seconds_total;(tokio|rocksdb).+ + action: drop`), &addition) + assert.Nil(t, err) + + err = mergeAdditionalScrapeConf(file.Name(), addition) + assert.Nil(t, err) + result, err := os.ReadFile(file.Name()) + assert.Nil(t, err) + + assert.Equal(t, expected, string(result)) +}