Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

calibrate: refactor metrics error #44451

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions executor/calibrate_resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,11 @@ func (e *calibrateResourceExec) Next(ctx context.Context, req *chunk.Chunk) erro
return e.staticCalibrate(ctx, req, exec)
}

var (
errLowUsage = errors.Errorf("The workload in selected time window is too low, with which TiDB is unable to reach a capacity estimation; please select another time window with higher workload, or calibrate resource by hardware instead")
errNoCPUQuotaMetrics = errors.Normalize("There is no CPU quota metrics, %v")
)

func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk.Chunk, exec sqlexec.RestrictedSQLExecutor) error {
startTs, endTs, err := e.parseCalibrateDuration(ctx)
if err != nil {
Expand All @@ -206,11 +211,11 @@ func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk

totalKVCPUQuota, err := getTiKVTotalCPUQuota(ctx, exec)
if err != nil {
return err
return errNoCPUQuotaMetrics.FastGenByArgs(err.Error())
}
totalTiDBCPU, err := getTiDBTotalCPUQuota(ctx, exec)
if err != nil {
return err
return errNoCPUQuotaMetrics.FastGenByArgs(err.Error())
}
rus, err := getRUPerSec(ctx, e.ctx, exec, startTime, endTime)
if err != nil {
Expand Down Expand Up @@ -256,10 +261,10 @@ func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk
tikvCPUs.next()
}
if len(quotas) < 5 {
return errors.Errorf("There are too few metrics points available in selected time window")
return errLowUsage
}
if float64(len(quotas))/float64(len(quotas)+lowCount) <= percentOfPass {
return errors.Errorf("The workload in selected time window is too low, with which TiDB is unable to reach a capacity estimation; please select another time window with higher workload, or calibrate resource by hardware instead")
return errLowUsage
}
sort.Slice(quotas, func(i, j int) bool {
return quotas[i] > quotas[j]
Expand All @@ -286,11 +291,11 @@ func (e *calibrateResourceExec) staticCalibrate(ctx context.Context, req *chunk.

totalKVCPUQuota, err := getTiKVTotalCPUQuota(ctx, exec)
if err != nil {
return err
return errNoCPUQuotaMetrics.FastGenByArgs(err.Error())
}
totalTiDBCPU, err := getTiDBTotalCPUQuota(ctx, exec)
if err != nil {
return err
return errNoCPUQuotaMetrics.FastGenByArgs(err.Error())
}

// The default workload to calculate the RU capacity.
Expand Down Expand Up @@ -390,9 +395,6 @@ func getValuesFromMetrics(ctx context.Context, sctx sessionctx.Context, exec sql
if err != nil {
return nil, errors.Trace(err)
}
if len(rows) == 0 {
return nil, errors.Errorf("metrics '%s' is empty", metrics)
}
ret := make([]*timePointValue, 0, len(rows))
for _, row := range rows {
if tp, err := row.GetTime(0).AdjustedGoTime(sctx.GetSessionVars().Location()); err == nil {
Expand Down
47 changes: 30 additions & 17 deletions executor/calibrate_resource_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,24 +95,30 @@ func TestCalibrateResource(t *testing.T) {
return time
}

mockData := map[string][][]types.Datum{
"tikv_cpu_quota": {
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", 8.0),
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-1", 8.0),
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-2", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-1", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-2", 8.0),
},
"tidb_server_maxprocs": {
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0", 40.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0", 40.0),
},
}
mockData := make(map[string][][]types.Datum)
ctx := context.WithValue(context.Background(), "__mockMetricsTableData", mockData)
ctx = failpoint.WithHook(ctx, func(_ context.Context, fpname string) bool {
return fpName == fpname
})
rs, err = tk.Exec("CALIBRATE RESOURCE")
require.NoError(t, err)
require.NotNil(t, rs)
err = rs.Next(ctx, rs.NewChunk(nil))
// because when mock metrics is empty, error is always `pd unavailable`, don't check detail.
require.ErrorContains(t, err, "There is no CPU quota metrics, query metric error: pd unavailable")

mockData["tikv_cpu_quota"] = [][]types.Datum{
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", 8.0),
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-1", 8.0),
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-2", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-1", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-2", 8.0),
}
mockData["tidb_server_maxprocs"] = [][]types.Datum{
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0", 40.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0", 40.0),
}
tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE").Check(testkit.Rows("69768"))
tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE WORKLOAD TPCC").Check(testkit.Rows("69768"))
tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE WORKLOAD OLTP_READ_WRITE").Check(testkit.Rows("55823"))
Expand Down Expand Up @@ -402,7 +408,7 @@ func TestCalibrateResource(t *testing.T) {
require.NoError(t, err)
require.NotNil(t, rs)
err = rs.Next(ctx, rs.NewChunk(nil))
require.ErrorContains(t, err, "There are too few metrics points available in selected time window")
require.ErrorContains(t, err, "The workload in selected time window is too low")

ru3 := [][]types.Datum{
types.MakeDatums(datetime("2020-02-12 10:25:00"), 2200.0),
Expand Down Expand Up @@ -442,7 +448,7 @@ func TestCalibrateResource(t *testing.T) {
require.NoError(t, err)
require.NotNil(t, rs)
err = rs.Next(ctx, rs.NewChunk(nil))
require.ErrorContains(t, err, "There are too few metrics points available in selected time window")
require.ErrorContains(t, err, "The workload in selected time window is too low")

// flash back to init data.
mockData["resource_manager_resource_unit"] = ru1
Expand Down Expand Up @@ -553,7 +559,14 @@ func TestCalibrateResource(t *testing.T) {
require.NoError(t, err)
require.NotNil(t, rs)
err = rs.Next(ctx, rs.NewChunk(nil))
require.ErrorContains(t, err, "There are too few metrics points available in selected time window")
require.ErrorContains(t, err, "The workload in selected time window is too low")

delete(mockData, "process_cpu_usage")
rs, err = tk.Exec("CALIBRATE RESOURCE START_TIME '2020-02-12 10:35:00' END_TIME '2020-02-12 10:45:00'")
require.NoError(t, err)
require.NotNil(t, rs)
err = rs.Next(ctx, rs.NewChunk(nil))
require.ErrorContains(t, err, "query metric error: pd unavailable")
}

type mockResourceGroupProvider struct {
Expand Down