Skip to content

Commit

Permalink
feat(cluster/spec): check alertmanager,prometheus's config (#1074)
Browse files Browse the repository at this point in the history
* feat(cluster/spec): check alertmanager,prometheus's config

* fix(cluster/spec): convert to ctxt.Executor

* fix(cluster/spec): use find cp instead of cp *.json(zsh compatible)

* enhance(cluster/spec): use find cp instead of cp for dm's grafana,prometheus

* fix(cluster/spec): index error

* feat(cluster/spec): add prometheus testcase

* feat(*): rm not used dm/prometheus.yml.tpl

* feat(cluster/spec): test Prometheus localDir

* fix(template/prometheus): if local rules were given, system rules won't auto copyed

* feat(clutster): make pkger

* feat(tests/cluster): ensure tidb exists in prometheus.yml
  • Loading branch information
jsvisa authored Jan 29, 2021
1 parent 74dfe6f commit 859e5fc
Show file tree
Hide file tree
Showing 10 changed files with 182 additions and 78 deletions.
3 changes: 1 addition & 2 deletions pkg/cluster/embed/autogen_pkger.go

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion pkg/cluster/spec/alertmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,10 @@ func (i *AlertManagerInstance) InitConfig(
if err := config.NewAlertManagerConfig().ConfigToFile(configPath); err != nil {
return err
}
return i.TransferLocalConfigFile(ctx, e, configPath, dst)
if err := i.TransferLocalConfigFile(ctx, e, configPath, dst); err != nil {
return err
}
return checkConfig(ctx, e, i.ComponentName(), clusterVersion, i.OS(), i.Arch(), i.ComponentName()+".yml", paths, nil)
}

// ScaleConfig deploy temporary config on scaling
Expand Down
4 changes: 2 additions & 2 deletions pkg/cluster/spec/grafana.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ func (i *GrafanaInstance) initDashboards(ctx context.Context, e ctxt.Executor, s
cmds := []string{
"mkdir -p %[1]s",
`find %[1]s -maxdepth 1 -type f -name "*.json" -delete`,
"cp %[2]s/bin/*.json %[1]s",
`find %[2]s/bin -maxdepth 1 -type f -name "*.json" -exec cp {} %[1]s \;`,
}
_, stderr, err := e.Execute(ctx, fmt.Sprintf(strings.Join(cmds, " && "), dashboardsDir, paths.Deploy), false)
if err != nil {
Expand Down Expand Up @@ -275,7 +275,7 @@ func (i *GrafanaInstance) installDashboards(ctx context.Context, e ctxt.Executor
cmds := []string{
"mkdir -p %[1]s",
`find %[1]s -maxdepth 1 -type f -name "*.json" -delete`,
"cp %[2]s/dm-master/scripts/*.json %[1]s",
`find %[2]s/dm-master/scripts -type f -name "*.json" -exec cp {} %[1]s \;`,
"rm -rf %[2]s",
}
_, stderr, err = e.Execute(ctx, fmt.Sprintf(strings.Join(cmds, " && "), targetDir, tmp), false)
Expand Down
10 changes: 7 additions & 3 deletions pkg/cluster/spec/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,11 @@ func (i *MonitorInstance) InitConfig(
return err
}
dst = filepath.Join(paths.Deploy, "conf", "prometheus.yml")
return e.Transfer(ctx, fp, dst, false)
if err := e.Transfer(ctx, fp, dst, false); err != nil {
return err
}

return checkConfig(ctx, e, i.ComponentName(), clusterVersion, i.OS(), i.Arch(), i.ComponentName()+".yml", paths, nil)
}

// We only really installRules for dm cluster because the rules(*.rules.yml) packed with the prometheus
Expand Down Expand Up @@ -314,7 +318,7 @@ func (i *MonitorInstance) installRules(ctx context.Context, e ctxt.Executor, dep
cmds := []string{
"mkdir -p %[1]s",
`find %[1]s -type f -name "*.rules.yml" -delete`,
"cp %[2]s/dm-master/conf/*.rules.yml %[1]s",
`find %[2]s/dm-master/conf -type f -name "*.rules.yml" -exec cp %[1]s \;`,
"rm -rf %[2]s",
}
_, stderr, err = e.Execute(ctx, fmt.Sprintf(strings.Join(cmds, " && "), targetDir, tmp), false)
Expand All @@ -336,7 +340,7 @@ func (i *MonitorInstance) initRules(ctx context.Context, e ctxt.Executor, spec P
cmds := []string{
"mkdir -p %[1]s/conf",
`find %[1]s/conf -type f -name "*.rules.yml" -delete`,
`cp %[1]s/bin/prometheus/*.rules.yml %[1]s/conf/`,
`find %[1]s/bin/prometheus -maxdepth 1 -type f -name "*.rules.yml" -exec cp {} %[1]s/conf/ \;`,
}
_, stderr, err := e.Execute(ctx, fmt.Sprintf(strings.Join(cmds, " && "), paths.Deploy), false)
if err != nil {
Expand Down
112 changes: 112 additions & 0 deletions pkg/cluster/spec/prometheus_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Copyright 2020 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package spec

import (
"context"
"io/ioutil"
"os"
"os/user"
"path"
"path/filepath"
"testing"

"github.com/pingcap/tiup/pkg/checkpoint"
"github.com/pingcap/tiup/pkg/cluster/executor"
"github.com/pingcap/tiup/pkg/meta"
"github.com/stretchr/testify/assert"
)

func TestLocalRuleDirs(t *testing.T) {
deployDir, err := ioutil.TempDir("", "tiup-*")
assert.Nil(t, err)
defer os.RemoveAll(deployDir)
err = os.MkdirAll(path.Join(deployDir, "bin/prometheus"), 0755)
assert.Nil(t, err)
localDir, err := filepath.Abs("./testdata/rules")
assert.Nil(t, err)

err = ioutil.WriteFile(path.Join(deployDir, "bin/prometheus", "dummy.rules.yml"), []byte("dummy"), 0644)
assert.Nil(t, err)

topo := new(Specification)
topo.Monitors = append(topo.Monitors, PrometheusSpec{
Host: "127.0.0.1",
Port: 9090,
RuleDir: localDir,
})

comp := MonitorComponent{topo}
ints := comp.Instances()

assert.Equal(t, len(ints), 1)
promInstance := ints[0].(*MonitorInstance)

user, err := user.Current()
assert.Nil(t, err)
e, err := executor.New(executor.SSHTypeNone, false, executor.SSHConfig{Host: "127.0.0.1", User: user.Username})
assert.Nil(t, err)

ctx := checkpoint.NewContext(context.Background())
err = promInstance.initRules(ctx, e, promInstance.InstanceSpec.(PrometheusSpec), meta.DirPaths{Deploy: deployDir})
assert.Nil(t, err)

assert.NoFileExists(t, path.Join(deployDir, "conf", "dummy.rules.yml"))
fs, err := ioutil.ReadDir(localDir)
assert.Nil(t, err)
for _, f := range fs {
assert.FileExists(t, path.Join(deployDir, "conf", f.Name()))
}
}

func TestNoLocalRuleDirs(t *testing.T) {
deployDir, err := ioutil.TempDir("", "tiup-*")
assert.Nil(t, err)
defer os.RemoveAll(deployDir)
err = os.MkdirAll(path.Join(deployDir, "bin/prometheus"), 0755)
assert.Nil(t, err)
localDir, err := filepath.Abs("./testdata/rules")
assert.Nil(t, err)

err = ioutil.WriteFile(path.Join(deployDir, "bin/prometheus", "dummy.rules.yml"), []byte("dummy"), 0644)
assert.Nil(t, err)

topo := new(Specification)
topo.Monitors = append(topo.Monitors, PrometheusSpec{
Host: "127.0.0.1",
Port: 9090,
})

comp := MonitorComponent{topo}
ints := comp.Instances()

assert.Equal(t, len(ints), 1)
promInstance := ints[0].(*MonitorInstance)

user, err := user.Current()
assert.Nil(t, err)
e, err := executor.New(executor.SSHTypeNone, false, executor.SSHConfig{Host: "127.0.0.1", User: user.Username})
assert.Nil(t, err)

ctx := checkpoint.NewContext(context.Background())
err = promInstance.initRules(ctx, e, promInstance.InstanceSpec.(PrometheusSpec), meta.DirPaths{Deploy: deployDir})
assert.Nil(t, err)

assert.FileExists(t, path.Join(deployDir, "conf", "dummy.rules.yml"))
fs, err := ioutil.ReadDir(localDir)
assert.Nil(t, err)
for _, f := range fs {
assert.NoFileExists(t, path.Join(deployDir, "conf", f.Name()))
}
}
55 changes: 32 additions & 23 deletions pkg/cluster/spec/server_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,35 +236,44 @@ func mergeImported(importConfig []byte, specConfigs ...map[string]interface{}) (
type BindVersion func(comp string, version string) (bindVersion string)

func checkConfig(ctx context.Context, e ctxt.Executor, componentName, clusterVersion, nodeOS, arch, config string, paths meta.DirPaths, bindVersion BindVersion) error {
repo, err := clusterutil.NewRepository(nodeOS, arch)
if err != nil {
return perrs.Annotate(ErrorCheckConfig, err.Error())
}
var cmd string
configPath := path.Join(paths.Deploy, "conf", config)
switch componentName {
case ComponentPrometheus:
cmd = fmt.Sprintf("%s/bin/prometheus/promtool check config %s", paths.Deploy, configPath)
case ComponentAlertmanager:
cmd = fmt.Sprintf("%s/bin/alertmanager/amtool check-config %s", paths.Deploy, configPath)
default:
repo, err := clusterutil.NewRepository(nodeOS, arch)
if err != nil {
return perrs.Annotate(ErrorCheckConfig, err.Error())
}

ver := clusterVersion
if bindVersion != nil {
ver = bindVersion(componentName, clusterVersion)
}
ver := clusterVersion
if bindVersion != nil {
ver = bindVersion(componentName, clusterVersion)
}

entry, err := repo.ComponentBinEntry(componentName, ver)
if err != nil {
return perrs.Annotate(ErrorCheckConfig, err.Error())
}
entry, err := repo.ComponentBinEntry(componentName, ver)
if err != nil {
return perrs.Annotate(ErrorCheckConfig, err.Error())
}
binPath := path.Join(paths.Deploy, "bin", entry)

binPath := path.Join(paths.Deploy, "bin", entry)
// Skip old versions
if !hasConfigCheckFlag(ctx, e, binPath) {
return nil
}
// Skip old versions
if !hasConfigCheckFlag(ctx, e, binPath) {
return nil
}

// Hack tikv --pd flag
extra := ""
if componentName == ComponentTiKV {
extra = `--pd=""`
// Hack tikv --pd flag
extra := ""
if componentName == ComponentTiKV {
extra = `--pd=""`
}
cmd = fmt.Sprintf("%s --config-check --config=%s %s", binPath, configPath, extra)
}

configPath := path.Join(paths.Deploy, "conf", config)
_, _, err = e.Execute(ctx, fmt.Sprintf("%s --config-check --config=%s %s", binPath, configPath, extra), false)
_, _, err := e.Execute(ctx, cmd, false)
if err != nil {
return perrs.Annotate(ErrorCheckConfig, err.Error())
}
Expand Down
15 changes: 15 additions & 0 deletions pkg/cluster/spec/testdata/rules/tidb.rules.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# magic-string-for-test
groups:
- name: alert.rules
rules:
- alert: TiDB_schema_error
expr: increase(tidb_session_schema_lease_error_total{type="outdated"}[15m]) > 0
for: 1m
labels:
env: ENV_LABELS_ENV
level: emergency
expr: increase(tidb_session_schema_lease_error_total{type="outdated"}[15m]) > 0
annotations:
description: "cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, values:{{ $value }}"
value: "{{ $value }}"
summary: TiDB schema error
44 changes: 0 additions & 44 deletions templates/config/dm/prometheus.yml.tpl

This file was deleted.

9 changes: 6 additions & 3 deletions templates/config/prometheus.yml.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,16 @@ global:

# Load and evaluate rules in this file every 'evaluation_interval' seconds.
rule_files:
{{- if .LocalRules}}
{{- range .LocalRules}}
- '{{.}}'
{{- end}}
{{- else}}
{{- if .MonitoredServers}}
- 'node.rules.yml'
- 'blacker.rules.yml'
- 'bypass.rules.yml'
{{- end}}
{{- range .LocalRules}}
- '{{.}}'
{{- end}}
{{- if .PDAddrs}}
- 'pd.rules.yml'
{{- end}}
Expand Down Expand Up @@ -48,6 +50,7 @@ rule_files:
{{- if .DMMasterAddrs}}
- 'dm_master.rules.yml'
{{- end}}
{{- end}}

{{- if .AlertmanagerAddrs}}
alerting:
Expand Down
3 changes: 3 additions & 0 deletions tests/tiup-cluster/script/cmd_subtest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ function cmd_subtest() {
tiup-cluster $client --yes deploy $name $version $topo -i ~/.ssh/id_rsa --skip-create-user

# check the local config
tiup-cluster $client exec $name -N n1 --command "grep tidb.rules.yml /home/tidb/deploy/prometheus-9090/conf/prometheus.yml"
! tiup-cluster $client exec $name -N n1 --command "grep node.rules.yml /home/tidb/deploy/prometheus-9090/conf/prometheus.yml"
tiup-cluster $client exec $name -N n1 --command "grep magic-string-for-test /home/tidb/deploy/prometheus-9090/conf/tidb.rules.yml"
tiup-cluster $client exec $name -N n1 --command "grep magic-string-for-test /home/tidb/deploy/grafana-3000/dashboards/tidb.json"
tiup-cluster $client exec $name -N n1 --command "grep magic-string-for-test /home/tidb/deploy/alertmanager-9093/conf/alertmanager.yml"
Expand Down Expand Up @@ -124,6 +126,7 @@ function cmd_subtest() {
tiup-cluster $client push $name test_transfer_1.txt "{{ .DeployDir }}/test_transfer.txt" -R grafana
tiup-cluster $client pull $name "{{ .DeployDir }}/test_transfer.txt" test_transfer_2.txt -R grafana
diff test_transfer_1.txt test_transfer_2.txt
rm -f test_transfer_{1,2}.txt

echo "checking cleanup data and log"
tiup-cluster $client exec $name -N n1 --command "ls /home/tidb/deploy/prometheus-9090/log/prometheus.log"
Expand Down

0 comments on commit 859e5fc

Please sign in to comment.