From 3736109a24383ffa45504fc57b5bf8033c47cf21 Mon Sep 17 00:00:00 2001 From: Yongbo Jiang Date: Fri, 16 Jun 2023 17:25:10 +0800 Subject: [PATCH 1/4] This is an automated cherry-pick of #44451 Signed-off-by: ti-chi-bot --- executor/calibrate_resource.go | 24 ++- executor/calibrate_resource_test.go | 248 ++++++++++++++++++++++++++-- 2 files changed, 252 insertions(+), 20 deletions(-) diff --git a/executor/calibrate_resource.go b/executor/calibrate_resource.go index 55081c4e5d192..2bb353ceb9d92 100644 --- a/executor/calibrate_resource.go +++ b/executor/calibrate_resource.go @@ -195,6 +195,11 @@ func (e *calibrateResourceExec) Next(ctx context.Context, req *chunk.Chunk) erro return e.staticCalibrate(ctx, req, exec) } +var ( + errLowUsage = errors.Errorf("The workload in selected time window is too low, with which TiDB is unable to reach a capacity estimation; please select another time window with higher workload, or calibrate resource by hardware instead") + errNoCPUQuotaMetrics = errors.Normalize("There is no CPU quota metrics, %v") +) + func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk.Chunk, exec sqlexec.RestrictedSQLExecutor) error { startTs, endTs, err := e.parseCalibrateDuration() if err != nil { @@ -205,11 +210,11 @@ func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk totalKVCPUQuota, err := getTiKVTotalCPUQuota(ctx, exec) if err != nil { - return err + return errNoCPUQuotaMetrics.FastGenByArgs(err.Error()) } totalTiDBCPU, err := getTiDBTotalCPUQuota(ctx, exec) if err != nil { - return err + return errNoCPUQuotaMetrics.FastGenByArgs(err.Error()) } rus, err := getRUPerSec(ctx, exec, startTime, endTime) if err != nil { @@ -241,8 +246,9 @@ func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk } } if len(quotas) < 5 { - return errors.Errorf("There are too few metrics points available in selected time window") + return errLowUsage } +<<<<<<< HEAD if float64(len(quotas))/float64(len(quotas)+lowCount) > percentOfPass { sort.Slice(quotas, func(i, j int) bool { return quotas[i] > quotas[j] @@ -257,6 +263,10 @@ func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk req.AppendUint64(0, uint64(quota)) } else { return errors.Errorf("The workload in selected time window is too low, with which TiDB is unable to reach a capacity estimation; please select another time window with higher workload, or calibrate resource by hardware instead") +======= + if float64(len(quotas))/float64(len(quotas)+lowCount) <= percentOfPass { + return errLowUsage +>>>>>>> 841aed8d95a (calibrate: refactor metrics error (#44451)) } return nil } @@ -272,11 +282,11 @@ func (e *calibrateResourceExec) staticCalibrate(ctx context.Context, req *chunk. totalKVCPUQuota, err := getTiKVTotalCPUQuota(ctx, exec) if err != nil { - return err + return errNoCPUQuotaMetrics.FastGenByArgs(err.Error()) } totalTiDBCPU, err := getTiDBTotalCPUQuota(ctx, exec) if err != nil { - return err + return errNoCPUQuotaMetrics.FastGenByArgs(err.Error()) } // The default workload to calculate the RU capacity. @@ -339,10 +349,14 @@ func getValuesFromMetrics(ctx context.Context, exec sqlexec.RestrictedSQLExecuto if err != nil { return nil, errors.Trace(err) } +<<<<<<< HEAD if len(rows) == 0 { return nil, errors.Errorf("metrics '%s' is empty", metrics) } ret := make([]float64, 0, len(rows)) +======= + ret := make([]*timePointValue, 0, len(rows)) +>>>>>>> 841aed8d95a (calibrate: refactor metrics error (#44451)) for _, row := range rows { ret = append(ret, row.GetFloat64(0)) } diff --git a/executor/calibrate_resource_test.go b/executor/calibrate_resource_test.go index 874eca8ddae38..c90cfa50820a2 100644 --- a/executor/calibrate_resource_test.go +++ b/executor/calibrate_resource_test.go @@ -95,24 +95,30 @@ func TestCalibrateResource(t *testing.T) { return time } - mockData := map[string][][]types.Datum{ - "tikv_cpu_quota": { - types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", 8.0), - types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-1", 8.0), - types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-2", 8.0), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", 8.0), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-1", 8.0), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-2", 8.0), - }, - "tidb_server_maxprocs": { - types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0", 40.0), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0", 40.0), - }, - } + mockData := make(map[string][][]types.Datum) ctx := context.WithValue(context.Background(), "__mockMetricsTableData", mockData) ctx = failpoint.WithHook(ctx, func(_ context.Context, fpname string) bool { return fpName == fpname }) + rs, err = tk.Exec("CALIBRATE RESOURCE") + require.NoError(t, err) + require.NotNil(t, rs) + err = rs.Next(ctx, rs.NewChunk(nil)) + // because when mock metrics is empty, error is always `pd unavailable`, don't check detail. + require.ErrorContains(t, err, "There is no CPU quota metrics, query metric error: pd unavailable") + + mockData["tikv_cpu_quota"] = [][]types.Datum{ + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", 8.0), + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-1", 8.0), + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-2", 8.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", 8.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-1", 8.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-2", 8.0), + } + mockData["tidb_server_maxprocs"] = [][]types.Datum{ + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0", 40.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0", 40.0), + } tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE").Check(testkit.Rows("69768")) tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE WORKLOAD TPCC").Check(testkit.Rows("69768")) tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE WORKLOAD OLTP_READ_WRITE").Check(testkit.Rows("55823")) @@ -243,6 +249,211 @@ func TestCalibrateResource(t *testing.T) { tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE END_TIME '2020-02-12 10:45:00' START_TIME '2020-02-12 10:35:00'").Check(testkit.Rows("5616")) tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE END_TIME '2020-02-12 10:45:00' DURATION '5m' START_TIME '2020-02-12 10:35:00' ").Check(testkit.Rows("5616")) +<<<<<<< HEAD +======= + // Statistical time points do not correspond + ruModify1 := [][]types.Datum{ + types.MakeDatums(datetime("2020-02-12 10:25:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:26:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:27:00"), 4.0), + types.MakeDatums(datetime("2020-02-12 10:28:00"), 6.0), + types.MakeDatums(datetime("2020-02-12 10:29:00"), 3.0), + types.MakeDatums(datetime("2020-02-12 10:30:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:31:00"), 7.0), + types.MakeDatums(datetime("2020-02-12 10:32:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:33:00"), 7.0), + types.MakeDatums(datetime("2020-02-12 10:34:00"), 8.0), + types.MakeDatums(datetime("2020-02-12 10:35:00"), 2200.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), 2100.0), + types.MakeDatums(datetime("2020-02-12 10:37:00"), 2250.0), + types.MakeDatums(datetime("2020-02-12 10:38:00"), 2300.0), + types.MakeDatums(datetime("2020-02-12 10:39:00"), 2230.0), + types.MakeDatums(datetime("2020-02-12 10:40:00"), 2210.0), + types.MakeDatums(datetime("2020-02-12 10:41:00"), 2250.0), + types.MakeDatums(datetime("2020-02-12 10:42:00"), 2330.0), + types.MakeDatums(datetime("2020-02-12 10:43:00"), 2330.0), + types.MakeDatums(datetime("2020-02-12 10:44:00"), 2300.0), + types.MakeDatums(datetime("2020-02-12 10:45:00"), 2280.0), + types.MakeDatums(datetime("2020-02-12 10:46:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:47:00"), 7.0), + types.MakeDatums(datetime("2020-02-12 10:48:00"), 8.0), + } + mockData["resource_manager_resource_unit"] = ruModify1 + tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE START_TIME '2020-02-12 10:25:00' DURATION '20m'").Check(testkit.Rows("5616")) + + ruModify2 := [][]types.Datum{ + types.MakeDatums(datetime("2020-02-12 10:25:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:26:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:27:00"), 4.0), + types.MakeDatums(datetime("2020-02-12 10:28:00"), 6.0), + types.MakeDatums(datetime("2020-02-12 10:29:00"), 2200.0), + types.MakeDatums(datetime("2020-02-12 10:30:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:31:00"), 7.0), + types.MakeDatums(datetime("2020-02-12 10:32:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:33:00"), 7.0), + types.MakeDatums(datetime("2020-02-12 10:34:00"), 8.0), + types.MakeDatums(datetime("2020-02-12 10:35:00"), 29.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), 2100.0), + types.MakeDatums(datetime("2020-02-12 10:37:00"), 49.0), + types.MakeDatums(datetime("2020-02-12 10:38:00"), 2300.0), + types.MakeDatums(datetime("2020-02-12 10:39:00"), 2230.0), + types.MakeDatums(datetime("2020-02-12 10:40:00"), 2210.0), + types.MakeDatums(datetime("2020-02-12 10:41:00"), 47.0), + types.MakeDatums(datetime("2020-02-12 10:42:00"), 2330.0), + types.MakeDatums(datetime("2020-02-12 10:43:00"), 2330.0), + types.MakeDatums(datetime("2020-02-12 10:44:00"), 2300.0), + types.MakeDatums(datetime("2020-02-12 10:45:00"), 2280.0), + types.MakeDatums(datetime("2020-02-12 10:47:00"), 2250.0), + types.MakeDatums(datetime("2020-02-12 10:49:00"), 2250.0), + } + mockData["resource_manager_resource_unit"] = ruModify2 + cpu2Mofidy := [][]types.Datum{ + types.MakeDatums(datetime("2020-02-12 10:29:00"), "tidb-0", "tidb", 3.212), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0", "tidb", 3.233), + types.MakeDatums(datetime("2020-02-12 10:38:00"), "tidb-0", "tidb", 3.213), + types.MakeDatums(datetime("2020-02-12 10:39:00"), "tidb-0", "tidb", 3.209), + types.MakeDatums(datetime("2020-02-12 10:40:00"), "tidb-0", "tidb", 3.213), + types.MakeDatums(datetime("2020-02-12 10:42:00"), "tidb-0", "tidb", 3.228), + types.MakeDatums(datetime("2020-02-12 10:43:00"), "tidb-0", "tidb", 3.219), + types.MakeDatums(datetime("2020-02-12 10:44:00"), "tidb-0", "tidb", 3.220), + types.MakeDatums(datetime("2020-02-12 10:45:00"), "tidb-0", "tidb", 3.221), + types.MakeDatums(datetime("2020-02-12 10:46:00"), "tidb-0", "tidb", 3.220), + types.MakeDatums(datetime("2020-02-12 10:47:00"), "tidb-0", "tidb", 3.236), + types.MakeDatums(datetime("2020-02-12 10:48:00"), "tidb-0", "tidb", 3.220), + types.MakeDatums(datetime("2020-02-12 10:49:00"), "tidb-0", "tidb", 3.234), + types.MakeDatums(datetime("2020-02-12 10:29:00"), "tikv-1", "tikv", 2.212), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-1", "tikv", 2.233), + types.MakeDatums(datetime("2020-02-12 10:49:00"), "tikv-1", "tikv", 2.234), + types.MakeDatums(datetime("2020-02-12 10:38:00"), "tikv-1", "tikv", 2.213), + types.MakeDatums(datetime("2020-02-12 10:39:00"), "tikv-1", "tikv", 2.209), + types.MakeDatums(datetime("2020-02-12 10:46:00"), "tikv-1", "tikv", 3.220), + types.MakeDatums(datetime("2020-02-12 10:40:00"), "tikv-1", "tikv", 2.213), + types.MakeDatums(datetime("2020-02-12 10:47:00"), "tikv-1", "tikv", 2.236), + types.MakeDatums(datetime("2020-02-12 10:42:00"), "tikv-1", "tikv", 2.228), + types.MakeDatums(datetime("2020-02-12 10:43:00"), "tikv-1", "tikv", 2.219), + types.MakeDatums(datetime("2020-02-12 10:44:00"), "tikv-1", "tikv", 2.220), + types.MakeDatums(datetime("2020-02-12 10:45:00"), "tikv-1", "tikv", 2.281), + types.MakeDatums(datetime("2020-02-12 10:29:00"), "tikv-0", "tikv", 2.282), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", "tikv", 2.283), + types.MakeDatums(datetime("2020-02-12 10:49:00"), "tikv-0", "tikv", 2.284), + types.MakeDatums(datetime("2020-02-12 10:38:00"), "tikv-0", "tikv", 2.283), + types.MakeDatums(datetime("2020-02-12 10:39:00"), "tikv-0", "tikv", 2.289), + types.MakeDatums(datetime("2020-02-12 10:40:00"), "tikv-0", "tikv", 2.283), + types.MakeDatums(datetime("2020-02-12 10:47:00"), "tikv-0", "tikv", 2.286), + types.MakeDatums(datetime("2020-02-12 10:42:00"), "tikv-0", "tikv", 2.288), + types.MakeDatums(datetime("2020-02-12 10:43:00"), "tikv-0", "tikv", 2.289), + types.MakeDatums(datetime("2020-02-12 10:44:00"), "tikv-0", "tikv", 2.280), + types.MakeDatums(datetime("2020-02-12 10:45:00"), "tikv-0", "tikv", 2.281), + types.MakeDatums(datetime("2020-02-12 10:29:00"), "tikv-2", "tikv", 2.112), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-2", "tikv", 2.133), + types.MakeDatums(datetime("2020-02-12 10:49:00"), "tikv-2", "tikv", 2.134), + types.MakeDatums(datetime("2020-02-12 10:38:00"), "tikv-2", "tikv", 2.113), + types.MakeDatums(datetime("2020-02-12 10:39:00"), "tikv-2", "tikv", 2.109), + types.MakeDatums(datetime("2020-02-12 10:40:00"), "tikv-2", "tikv", 2.113), + types.MakeDatums(datetime("2020-02-12 10:47:00"), "tikv-2", "tikv", 2.136), + types.MakeDatums(datetime("2020-02-12 10:42:00"), "tikv-2", "tikv", 2.128), + types.MakeDatums(datetime("2020-02-12 10:43:00"), "tikv-2", "tikv", 2.119), + types.MakeDatums(datetime("2020-02-12 10:44:00"), "tikv-2", "tikv", 2.120), + types.MakeDatums(datetime("2020-02-12 10:45:00"), "tikv-2", "tikv", 2.281), + types.MakeDatums(datetime("2020-02-12 10:48:00"), "tikv-2", "tikv", 3.220), + } + mockData["process_cpu_usage"] = cpu2Mofidy + tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE START_TIME '2020-02-12 10:25:00' DURATION '20m'").Check(testkit.Rows("5616")) + + ruModify3 := [][]types.Datum{ + types.MakeDatums(datetime("2020-02-12 10:25:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:26:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:27:00"), 4.0), + types.MakeDatums(datetime("2020-02-12 10:28:00"), 6.0), + types.MakeDatums(datetime("2020-02-12 10:29:00"), 2200.0), + types.MakeDatums(datetime("2020-02-12 10:30:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:31:00"), 7.0), + types.MakeDatums(datetime("2020-02-12 10:32:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:33:00"), 7.0), + types.MakeDatums(datetime("2020-02-12 10:34:00"), 8.0), + types.MakeDatums(datetime("2020-02-12 10:35:00"), 29.0), + types.MakeDatums(datetime("2020-02-12 10:36:20"), 2100.0), + types.MakeDatums(datetime("2020-02-12 10:37:00"), 49.0), + types.MakeDatums(datetime("2020-02-12 10:38:00"), 2300.0), + types.MakeDatums(datetime("2020-02-12 10:39:00"), 2230.0), + types.MakeDatums(datetime("2020-02-12 10:40:00"), 2210.0), + types.MakeDatums(datetime("2020-02-12 10:41:00"), 47.0), + types.MakeDatums(datetime("2020-02-12 10:42:20"), 2330.0), + types.MakeDatums(datetime("2020-02-12 10:43:00"), 2330.0), + types.MakeDatums(datetime("2020-02-12 10:44:00"), 2300.0), + types.MakeDatums(datetime("2020-02-12 10:45:00"), 2280.0), + types.MakeDatums(datetime("2020-02-12 10:47:00"), 2250.0), + types.MakeDatums(datetime("2020-02-12 10:49:00"), 2250.0), + } + mockData["resource_manager_resource_unit"] = ruModify3 + // because there are 20s difference in two time points, the result is changed. + tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE START_TIME '2020-02-12 10:25:00' DURATION '20m'").Check(testkit.Rows("5613")) + + ru2 := [][]types.Datum{ + types.MakeDatums(datetime("2020-02-12 10:25:00"), 2200.0), + types.MakeDatums(datetime("2020-02-12 10:26:00"), 2100.0), + types.MakeDatums(datetime("2020-02-12 10:27:00"), 2250.0), + types.MakeDatums(datetime("2020-02-12 10:28:00"), 2300.0), + types.MakeDatums(datetime("2020-02-12 10:29:00"), 2230.0), + types.MakeDatums(datetime("2020-02-12 10:30:00"), 2210.0), + types.MakeDatums(datetime("2020-02-12 10:31:00"), 2250.0), + types.MakeDatums(datetime("2020-02-12 10:32:00"), 2330.0), + types.MakeDatums(datetime("2020-02-12 10:33:00"), 2330.0), + types.MakeDatums(datetime("2020-02-12 10:34:00"), 2300.0), + types.MakeDatums(datetime("2020-02-12 10:35:00"), 2280.0), + } + mockData["resource_manager_resource_unit"] = ru2 + rs, err = tk.Exec("CALIBRATE RESOURCE START_TIME '2020-02-12 10:25:00' DURATION '20m'") + require.NoError(t, err) + require.NotNil(t, rs) + err = rs.Next(ctx, rs.NewChunk(nil)) + require.ErrorContains(t, err, "The workload in selected time window is too low") + + ru3 := [][]types.Datum{ + types.MakeDatums(datetime("2020-02-12 10:25:00"), 2200.0), + types.MakeDatums(datetime("2020-02-12 10:27:00"), 2100.0), + types.MakeDatums(datetime("2020-02-12 10:28:00"), 2250.0), + types.MakeDatums(datetime("2020-02-12 10:30:00"), 2300.0), + types.MakeDatums(datetime("2020-02-12 10:31:00"), 2230.0), + types.MakeDatums(datetime("2020-02-12 10:33:00"), 2210.0), + types.MakeDatums(datetime("2020-02-12 10:34:00"), 2250.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), 2330.0), + types.MakeDatums(datetime("2020-02-12 10:37:00"), 2330.0), + types.MakeDatums(datetime("2020-02-12 10:39:00"), 2280.0), + types.MakeDatums(datetime("2020-02-12 10:40:00"), 2280.0), + types.MakeDatums(datetime("2020-02-12 10:42:00"), 2280.0), + types.MakeDatums(datetime("2020-02-12 10:43:00"), 2280.0), + } + mockData["resource_manager_resource_unit"] = ru3 + cpu3 := [][]types.Datum{ + types.MakeDatums(datetime("2020-02-12 10:26:00"), "tidb-0", "tidb", 3.212), + types.MakeDatums(datetime("2020-02-12 10:29:00"), "tidb-0", "tidb", 3.233), + types.MakeDatums(datetime("2020-02-12 10:32:00"), "tidb-0", "tidb", 3.213), + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0", "tidb", 3.209), + types.MakeDatums(datetime("2020-02-12 10:38:00"), "tidb-0", "tidb", 3.213), + types.MakeDatums(datetime("2020-02-12 10:41:00"), "tidb-0", "tidb", 3.228), + types.MakeDatums(datetime("2020-02-12 10:44:00"), "tidb-0", "tidb", 3.219), + + types.MakeDatums(datetime("2020-02-12 10:26:00"), "tikv-0", "tikv", 2.282), + types.MakeDatums(datetime("2020-02-12 10:29:00"), "tikv-0", "tikv", 2.283), + types.MakeDatums(datetime("2020-02-12 10:32:00"), "tikv-0", "tikv", 2.284), + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", "tikv", 2.283), + types.MakeDatums(datetime("2020-02-12 10:38:00"), "tikv-0", "tikv", 2.289), + types.MakeDatums(datetime("2020-02-12 10:41:00"), "tikv-0", "tikv", 2.283), + types.MakeDatums(datetime("2020-02-12 10:44:00"), "tikv-0", "tikv", 2.286), + } + mockData["process_cpu_usage"] = cpu3 + rs, err = tk.Exec("CALIBRATE RESOURCE START_TIME '2020-02-12 10:25:00' DURATION '20m'") + require.NoError(t, err) + require.NotNil(t, rs) + err = rs.Next(ctx, rs.NewChunk(nil)) + require.ErrorContains(t, err, "The workload in selected time window is too low") + + // flash back to init data. + mockData["resource_manager_resource_unit"] = ru1 + mockData["process_cpu_usage"] = cpu2 + +>>>>>>> 841aed8d95a (calibrate: refactor metrics error (#44451)) rs, err = tk.Exec("CALIBRATE RESOURCE START_TIME '2020-02-12 10:35:00'") require.NoError(t, err) require.NotNil(t, rs) @@ -348,7 +559,14 @@ func TestCalibrateResource(t *testing.T) { require.NoError(t, err) require.NotNil(t, rs) err = rs.Next(ctx, rs.NewChunk(nil)) - require.ErrorContains(t, err, "There are too few metrics points available in selected time window") + require.ErrorContains(t, err, "The workload in selected time window is too low") + + delete(mockData, "process_cpu_usage") + rs, err = tk.Exec("CALIBRATE RESOURCE START_TIME '2020-02-12 10:35:00' END_TIME '2020-02-12 10:45:00'") + require.NoError(t, err) + require.NotNil(t, rs) + err = rs.Next(ctx, rs.NewChunk(nil)) + require.ErrorContains(t, err, "query metric error: pd unavailable") } type mockResourceGroupProvider struct { From ba851d500e52c57379cc8f231a0b7a649ba0f21b Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Wed, 5 Jul 2023 16:13:37 +0800 Subject: [PATCH 2/4] Revert "This is an automated cherry-pick of #44451" This reverts commit 3736109a24383ffa45504fc57b5bf8033c47cf21. Signed-off-by: Cabinfever_B --- executor/calibrate_resource.go | 24 +-- executor/calibrate_resource_test.go | 248 ++-------------------------- 2 files changed, 20 insertions(+), 252 deletions(-) diff --git a/executor/calibrate_resource.go b/executor/calibrate_resource.go index 2bb353ceb9d92..55081c4e5d192 100644 --- a/executor/calibrate_resource.go +++ b/executor/calibrate_resource.go @@ -195,11 +195,6 @@ func (e *calibrateResourceExec) Next(ctx context.Context, req *chunk.Chunk) erro return e.staticCalibrate(ctx, req, exec) } -var ( - errLowUsage = errors.Errorf("The workload in selected time window is too low, with which TiDB is unable to reach a capacity estimation; please select another time window with higher workload, or calibrate resource by hardware instead") - errNoCPUQuotaMetrics = errors.Normalize("There is no CPU quota metrics, %v") -) - func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk.Chunk, exec sqlexec.RestrictedSQLExecutor) error { startTs, endTs, err := e.parseCalibrateDuration() if err != nil { @@ -210,11 +205,11 @@ func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk totalKVCPUQuota, err := getTiKVTotalCPUQuota(ctx, exec) if err != nil { - return errNoCPUQuotaMetrics.FastGenByArgs(err.Error()) + return err } totalTiDBCPU, err := getTiDBTotalCPUQuota(ctx, exec) if err != nil { - return errNoCPUQuotaMetrics.FastGenByArgs(err.Error()) + return err } rus, err := getRUPerSec(ctx, exec, startTime, endTime) if err != nil { @@ -246,9 +241,8 @@ func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk } } if len(quotas) < 5 { - return errLowUsage + return errors.Errorf("There are too few metrics points available in selected time window") } -<<<<<<< HEAD if float64(len(quotas))/float64(len(quotas)+lowCount) > percentOfPass { sort.Slice(quotas, func(i, j int) bool { return quotas[i] > quotas[j] @@ -263,10 +257,6 @@ func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk req.AppendUint64(0, uint64(quota)) } else { return errors.Errorf("The workload in selected time window is too low, with which TiDB is unable to reach a capacity estimation; please select another time window with higher workload, or calibrate resource by hardware instead") -======= - if float64(len(quotas))/float64(len(quotas)+lowCount) <= percentOfPass { - return errLowUsage ->>>>>>> 841aed8d95a (calibrate: refactor metrics error (#44451)) } return nil } @@ -282,11 +272,11 @@ func (e *calibrateResourceExec) staticCalibrate(ctx context.Context, req *chunk. totalKVCPUQuota, err := getTiKVTotalCPUQuota(ctx, exec) if err != nil { - return errNoCPUQuotaMetrics.FastGenByArgs(err.Error()) + return err } totalTiDBCPU, err := getTiDBTotalCPUQuota(ctx, exec) if err != nil { - return errNoCPUQuotaMetrics.FastGenByArgs(err.Error()) + return err } // The default workload to calculate the RU capacity. @@ -349,14 +339,10 @@ func getValuesFromMetrics(ctx context.Context, exec sqlexec.RestrictedSQLExecuto if err != nil { return nil, errors.Trace(err) } -<<<<<<< HEAD if len(rows) == 0 { return nil, errors.Errorf("metrics '%s' is empty", metrics) } ret := make([]float64, 0, len(rows)) -======= - ret := make([]*timePointValue, 0, len(rows)) ->>>>>>> 841aed8d95a (calibrate: refactor metrics error (#44451)) for _, row := range rows { ret = append(ret, row.GetFloat64(0)) } diff --git a/executor/calibrate_resource_test.go b/executor/calibrate_resource_test.go index c90cfa50820a2..874eca8ddae38 100644 --- a/executor/calibrate_resource_test.go +++ b/executor/calibrate_resource_test.go @@ -95,30 +95,24 @@ func TestCalibrateResource(t *testing.T) { return time } - mockData := make(map[string][][]types.Datum) + mockData := map[string][][]types.Datum{ + "tikv_cpu_quota": { + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", 8.0), + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-1", 8.0), + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-2", 8.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", 8.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-1", 8.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-2", 8.0), + }, + "tidb_server_maxprocs": { + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0", 40.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0", 40.0), + }, + } ctx := context.WithValue(context.Background(), "__mockMetricsTableData", mockData) ctx = failpoint.WithHook(ctx, func(_ context.Context, fpname string) bool { return fpName == fpname }) - rs, err = tk.Exec("CALIBRATE RESOURCE") - require.NoError(t, err) - require.NotNil(t, rs) - err = rs.Next(ctx, rs.NewChunk(nil)) - // because when mock metrics is empty, error is always `pd unavailable`, don't check detail. - require.ErrorContains(t, err, "There is no CPU quota metrics, query metric error: pd unavailable") - - mockData["tikv_cpu_quota"] = [][]types.Datum{ - types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", 8.0), - types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-1", 8.0), - types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-2", 8.0), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", 8.0), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-1", 8.0), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-2", 8.0), - } - mockData["tidb_server_maxprocs"] = [][]types.Datum{ - types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0", 40.0), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0", 40.0), - } tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE").Check(testkit.Rows("69768")) tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE WORKLOAD TPCC").Check(testkit.Rows("69768")) tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE WORKLOAD OLTP_READ_WRITE").Check(testkit.Rows("55823")) @@ -249,211 +243,6 @@ func TestCalibrateResource(t *testing.T) { tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE END_TIME '2020-02-12 10:45:00' START_TIME '2020-02-12 10:35:00'").Check(testkit.Rows("5616")) tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE END_TIME '2020-02-12 10:45:00' DURATION '5m' START_TIME '2020-02-12 10:35:00' ").Check(testkit.Rows("5616")) -<<<<<<< HEAD -======= - // Statistical time points do not correspond - ruModify1 := [][]types.Datum{ - types.MakeDatums(datetime("2020-02-12 10:25:00"), 5.0), - types.MakeDatums(datetime("2020-02-12 10:26:00"), 5.0), - types.MakeDatums(datetime("2020-02-12 10:27:00"), 4.0), - types.MakeDatums(datetime("2020-02-12 10:28:00"), 6.0), - types.MakeDatums(datetime("2020-02-12 10:29:00"), 3.0), - types.MakeDatums(datetime("2020-02-12 10:30:00"), 5.0), - types.MakeDatums(datetime("2020-02-12 10:31:00"), 7.0), - types.MakeDatums(datetime("2020-02-12 10:32:00"), 5.0), - types.MakeDatums(datetime("2020-02-12 10:33:00"), 7.0), - types.MakeDatums(datetime("2020-02-12 10:34:00"), 8.0), - types.MakeDatums(datetime("2020-02-12 10:35:00"), 2200.0), - types.MakeDatums(datetime("2020-02-12 10:36:00"), 2100.0), - types.MakeDatums(datetime("2020-02-12 10:37:00"), 2250.0), - types.MakeDatums(datetime("2020-02-12 10:38:00"), 2300.0), - types.MakeDatums(datetime("2020-02-12 10:39:00"), 2230.0), - types.MakeDatums(datetime("2020-02-12 10:40:00"), 2210.0), - types.MakeDatums(datetime("2020-02-12 10:41:00"), 2250.0), - types.MakeDatums(datetime("2020-02-12 10:42:00"), 2330.0), - types.MakeDatums(datetime("2020-02-12 10:43:00"), 2330.0), - types.MakeDatums(datetime("2020-02-12 10:44:00"), 2300.0), - types.MakeDatums(datetime("2020-02-12 10:45:00"), 2280.0), - types.MakeDatums(datetime("2020-02-12 10:46:00"), 5.0), - types.MakeDatums(datetime("2020-02-12 10:47:00"), 7.0), - types.MakeDatums(datetime("2020-02-12 10:48:00"), 8.0), - } - mockData["resource_manager_resource_unit"] = ruModify1 - tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE START_TIME '2020-02-12 10:25:00' DURATION '20m'").Check(testkit.Rows("5616")) - - ruModify2 := [][]types.Datum{ - types.MakeDatums(datetime("2020-02-12 10:25:00"), 5.0), - types.MakeDatums(datetime("2020-02-12 10:26:00"), 5.0), - types.MakeDatums(datetime("2020-02-12 10:27:00"), 4.0), - types.MakeDatums(datetime("2020-02-12 10:28:00"), 6.0), - types.MakeDatums(datetime("2020-02-12 10:29:00"), 2200.0), - types.MakeDatums(datetime("2020-02-12 10:30:00"), 5.0), - types.MakeDatums(datetime("2020-02-12 10:31:00"), 7.0), - types.MakeDatums(datetime("2020-02-12 10:32:00"), 5.0), - types.MakeDatums(datetime("2020-02-12 10:33:00"), 7.0), - types.MakeDatums(datetime("2020-02-12 10:34:00"), 8.0), - types.MakeDatums(datetime("2020-02-12 10:35:00"), 29.0), - types.MakeDatums(datetime("2020-02-12 10:36:00"), 2100.0), - types.MakeDatums(datetime("2020-02-12 10:37:00"), 49.0), - types.MakeDatums(datetime("2020-02-12 10:38:00"), 2300.0), - types.MakeDatums(datetime("2020-02-12 10:39:00"), 2230.0), - types.MakeDatums(datetime("2020-02-12 10:40:00"), 2210.0), - types.MakeDatums(datetime("2020-02-12 10:41:00"), 47.0), - types.MakeDatums(datetime("2020-02-12 10:42:00"), 2330.0), - types.MakeDatums(datetime("2020-02-12 10:43:00"), 2330.0), - types.MakeDatums(datetime("2020-02-12 10:44:00"), 2300.0), - types.MakeDatums(datetime("2020-02-12 10:45:00"), 2280.0), - types.MakeDatums(datetime("2020-02-12 10:47:00"), 2250.0), - types.MakeDatums(datetime("2020-02-12 10:49:00"), 2250.0), - } - mockData["resource_manager_resource_unit"] = ruModify2 - cpu2Mofidy := [][]types.Datum{ - types.MakeDatums(datetime("2020-02-12 10:29:00"), "tidb-0", "tidb", 3.212), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0", "tidb", 3.233), - types.MakeDatums(datetime("2020-02-12 10:38:00"), "tidb-0", "tidb", 3.213), - types.MakeDatums(datetime("2020-02-12 10:39:00"), "tidb-0", "tidb", 3.209), - types.MakeDatums(datetime("2020-02-12 10:40:00"), "tidb-0", "tidb", 3.213), - types.MakeDatums(datetime("2020-02-12 10:42:00"), "tidb-0", "tidb", 3.228), - types.MakeDatums(datetime("2020-02-12 10:43:00"), "tidb-0", "tidb", 3.219), - types.MakeDatums(datetime("2020-02-12 10:44:00"), "tidb-0", "tidb", 3.220), - types.MakeDatums(datetime("2020-02-12 10:45:00"), "tidb-0", "tidb", 3.221), - types.MakeDatums(datetime("2020-02-12 10:46:00"), "tidb-0", "tidb", 3.220), - types.MakeDatums(datetime("2020-02-12 10:47:00"), "tidb-0", "tidb", 3.236), - types.MakeDatums(datetime("2020-02-12 10:48:00"), "tidb-0", "tidb", 3.220), - types.MakeDatums(datetime("2020-02-12 10:49:00"), "tidb-0", "tidb", 3.234), - types.MakeDatums(datetime("2020-02-12 10:29:00"), "tikv-1", "tikv", 2.212), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-1", "tikv", 2.233), - types.MakeDatums(datetime("2020-02-12 10:49:00"), "tikv-1", "tikv", 2.234), - types.MakeDatums(datetime("2020-02-12 10:38:00"), "tikv-1", "tikv", 2.213), - types.MakeDatums(datetime("2020-02-12 10:39:00"), "tikv-1", "tikv", 2.209), - types.MakeDatums(datetime("2020-02-12 10:46:00"), "tikv-1", "tikv", 3.220), - types.MakeDatums(datetime("2020-02-12 10:40:00"), "tikv-1", "tikv", 2.213), - types.MakeDatums(datetime("2020-02-12 10:47:00"), "tikv-1", "tikv", 2.236), - types.MakeDatums(datetime("2020-02-12 10:42:00"), "tikv-1", "tikv", 2.228), - types.MakeDatums(datetime("2020-02-12 10:43:00"), "tikv-1", "tikv", 2.219), - types.MakeDatums(datetime("2020-02-12 10:44:00"), "tikv-1", "tikv", 2.220), - types.MakeDatums(datetime("2020-02-12 10:45:00"), "tikv-1", "tikv", 2.281), - types.MakeDatums(datetime("2020-02-12 10:29:00"), "tikv-0", "tikv", 2.282), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", "tikv", 2.283), - types.MakeDatums(datetime("2020-02-12 10:49:00"), "tikv-0", "tikv", 2.284), - types.MakeDatums(datetime("2020-02-12 10:38:00"), "tikv-0", "tikv", 2.283), - types.MakeDatums(datetime("2020-02-12 10:39:00"), "tikv-0", "tikv", 2.289), - types.MakeDatums(datetime("2020-02-12 10:40:00"), "tikv-0", "tikv", 2.283), - types.MakeDatums(datetime("2020-02-12 10:47:00"), "tikv-0", "tikv", 2.286), - types.MakeDatums(datetime("2020-02-12 10:42:00"), "tikv-0", "tikv", 2.288), - types.MakeDatums(datetime("2020-02-12 10:43:00"), "tikv-0", "tikv", 2.289), - types.MakeDatums(datetime("2020-02-12 10:44:00"), "tikv-0", "tikv", 2.280), - types.MakeDatums(datetime("2020-02-12 10:45:00"), "tikv-0", "tikv", 2.281), - types.MakeDatums(datetime("2020-02-12 10:29:00"), "tikv-2", "tikv", 2.112), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-2", "tikv", 2.133), - types.MakeDatums(datetime("2020-02-12 10:49:00"), "tikv-2", "tikv", 2.134), - types.MakeDatums(datetime("2020-02-12 10:38:00"), "tikv-2", "tikv", 2.113), - types.MakeDatums(datetime("2020-02-12 10:39:00"), "tikv-2", "tikv", 2.109), - types.MakeDatums(datetime("2020-02-12 10:40:00"), "tikv-2", "tikv", 2.113), - types.MakeDatums(datetime("2020-02-12 10:47:00"), "tikv-2", "tikv", 2.136), - types.MakeDatums(datetime("2020-02-12 10:42:00"), "tikv-2", "tikv", 2.128), - types.MakeDatums(datetime("2020-02-12 10:43:00"), "tikv-2", "tikv", 2.119), - types.MakeDatums(datetime("2020-02-12 10:44:00"), "tikv-2", "tikv", 2.120), - types.MakeDatums(datetime("2020-02-12 10:45:00"), "tikv-2", "tikv", 2.281), - types.MakeDatums(datetime("2020-02-12 10:48:00"), "tikv-2", "tikv", 3.220), - } - mockData["process_cpu_usage"] = cpu2Mofidy - tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE START_TIME '2020-02-12 10:25:00' DURATION '20m'").Check(testkit.Rows("5616")) - - ruModify3 := [][]types.Datum{ - types.MakeDatums(datetime("2020-02-12 10:25:00"), 5.0), - types.MakeDatums(datetime("2020-02-12 10:26:00"), 5.0), - types.MakeDatums(datetime("2020-02-12 10:27:00"), 4.0), - types.MakeDatums(datetime("2020-02-12 10:28:00"), 6.0), - types.MakeDatums(datetime("2020-02-12 10:29:00"), 2200.0), - types.MakeDatums(datetime("2020-02-12 10:30:00"), 5.0), - types.MakeDatums(datetime("2020-02-12 10:31:00"), 7.0), - types.MakeDatums(datetime("2020-02-12 10:32:00"), 5.0), - types.MakeDatums(datetime("2020-02-12 10:33:00"), 7.0), - types.MakeDatums(datetime("2020-02-12 10:34:00"), 8.0), - types.MakeDatums(datetime("2020-02-12 10:35:00"), 29.0), - types.MakeDatums(datetime("2020-02-12 10:36:20"), 2100.0), - types.MakeDatums(datetime("2020-02-12 10:37:00"), 49.0), - types.MakeDatums(datetime("2020-02-12 10:38:00"), 2300.0), - types.MakeDatums(datetime("2020-02-12 10:39:00"), 2230.0), - types.MakeDatums(datetime("2020-02-12 10:40:00"), 2210.0), - types.MakeDatums(datetime("2020-02-12 10:41:00"), 47.0), - types.MakeDatums(datetime("2020-02-12 10:42:20"), 2330.0), - types.MakeDatums(datetime("2020-02-12 10:43:00"), 2330.0), - types.MakeDatums(datetime("2020-02-12 10:44:00"), 2300.0), - types.MakeDatums(datetime("2020-02-12 10:45:00"), 2280.0), - types.MakeDatums(datetime("2020-02-12 10:47:00"), 2250.0), - types.MakeDatums(datetime("2020-02-12 10:49:00"), 2250.0), - } - mockData["resource_manager_resource_unit"] = ruModify3 - // because there are 20s difference in two time points, the result is changed. - tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE START_TIME '2020-02-12 10:25:00' DURATION '20m'").Check(testkit.Rows("5613")) - - ru2 := [][]types.Datum{ - types.MakeDatums(datetime("2020-02-12 10:25:00"), 2200.0), - types.MakeDatums(datetime("2020-02-12 10:26:00"), 2100.0), - types.MakeDatums(datetime("2020-02-12 10:27:00"), 2250.0), - types.MakeDatums(datetime("2020-02-12 10:28:00"), 2300.0), - types.MakeDatums(datetime("2020-02-12 10:29:00"), 2230.0), - types.MakeDatums(datetime("2020-02-12 10:30:00"), 2210.0), - types.MakeDatums(datetime("2020-02-12 10:31:00"), 2250.0), - types.MakeDatums(datetime("2020-02-12 10:32:00"), 2330.0), - types.MakeDatums(datetime("2020-02-12 10:33:00"), 2330.0), - types.MakeDatums(datetime("2020-02-12 10:34:00"), 2300.0), - types.MakeDatums(datetime("2020-02-12 10:35:00"), 2280.0), - } - mockData["resource_manager_resource_unit"] = ru2 - rs, err = tk.Exec("CALIBRATE RESOURCE START_TIME '2020-02-12 10:25:00' DURATION '20m'") - require.NoError(t, err) - require.NotNil(t, rs) - err = rs.Next(ctx, rs.NewChunk(nil)) - require.ErrorContains(t, err, "The workload in selected time window is too low") - - ru3 := [][]types.Datum{ - types.MakeDatums(datetime("2020-02-12 10:25:00"), 2200.0), - types.MakeDatums(datetime("2020-02-12 10:27:00"), 2100.0), - types.MakeDatums(datetime("2020-02-12 10:28:00"), 2250.0), - types.MakeDatums(datetime("2020-02-12 10:30:00"), 2300.0), - types.MakeDatums(datetime("2020-02-12 10:31:00"), 2230.0), - types.MakeDatums(datetime("2020-02-12 10:33:00"), 2210.0), - types.MakeDatums(datetime("2020-02-12 10:34:00"), 2250.0), - types.MakeDatums(datetime("2020-02-12 10:36:00"), 2330.0), - types.MakeDatums(datetime("2020-02-12 10:37:00"), 2330.0), - types.MakeDatums(datetime("2020-02-12 10:39:00"), 2280.0), - types.MakeDatums(datetime("2020-02-12 10:40:00"), 2280.0), - types.MakeDatums(datetime("2020-02-12 10:42:00"), 2280.0), - types.MakeDatums(datetime("2020-02-12 10:43:00"), 2280.0), - } - mockData["resource_manager_resource_unit"] = ru3 - cpu3 := [][]types.Datum{ - types.MakeDatums(datetime("2020-02-12 10:26:00"), "tidb-0", "tidb", 3.212), - types.MakeDatums(datetime("2020-02-12 10:29:00"), "tidb-0", "tidb", 3.233), - types.MakeDatums(datetime("2020-02-12 10:32:00"), "tidb-0", "tidb", 3.213), - types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0", "tidb", 3.209), - types.MakeDatums(datetime("2020-02-12 10:38:00"), "tidb-0", "tidb", 3.213), - types.MakeDatums(datetime("2020-02-12 10:41:00"), "tidb-0", "tidb", 3.228), - types.MakeDatums(datetime("2020-02-12 10:44:00"), "tidb-0", "tidb", 3.219), - - types.MakeDatums(datetime("2020-02-12 10:26:00"), "tikv-0", "tikv", 2.282), - types.MakeDatums(datetime("2020-02-12 10:29:00"), "tikv-0", "tikv", 2.283), - types.MakeDatums(datetime("2020-02-12 10:32:00"), "tikv-0", "tikv", 2.284), - types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", "tikv", 2.283), - types.MakeDatums(datetime("2020-02-12 10:38:00"), "tikv-0", "tikv", 2.289), - types.MakeDatums(datetime("2020-02-12 10:41:00"), "tikv-0", "tikv", 2.283), - types.MakeDatums(datetime("2020-02-12 10:44:00"), "tikv-0", "tikv", 2.286), - } - mockData["process_cpu_usage"] = cpu3 - rs, err = tk.Exec("CALIBRATE RESOURCE START_TIME '2020-02-12 10:25:00' DURATION '20m'") - require.NoError(t, err) - require.NotNil(t, rs) - err = rs.Next(ctx, rs.NewChunk(nil)) - require.ErrorContains(t, err, "The workload in selected time window is too low") - - // flash back to init data. - mockData["resource_manager_resource_unit"] = ru1 - mockData["process_cpu_usage"] = cpu2 - ->>>>>>> 841aed8d95a (calibrate: refactor metrics error (#44451)) rs, err = tk.Exec("CALIBRATE RESOURCE START_TIME '2020-02-12 10:35:00'") require.NoError(t, err) require.NotNil(t, rs) @@ -559,14 +348,7 @@ func TestCalibrateResource(t *testing.T) { require.NoError(t, err) require.NotNil(t, rs) err = rs.Next(ctx, rs.NewChunk(nil)) - require.ErrorContains(t, err, "The workload in selected time window is too low") - - delete(mockData, "process_cpu_usage") - rs, err = tk.Exec("CALIBRATE RESOURCE START_TIME '2020-02-12 10:35:00' END_TIME '2020-02-12 10:45:00'") - require.NoError(t, err) - require.NotNil(t, rs) - err = rs.Next(ctx, rs.NewChunk(nil)) - require.ErrorContains(t, err, "query metric error: pd unavailable") + require.ErrorContains(t, err, "There are too few metrics points available in selected time window") } type mockResourceGroupProvider struct { From 370db24082c0a67aa0ecfc1ad6939e8e93a3cd69 Mon Sep 17 00:00:00 2001 From: Yongbo Jiang Date: Tue, 25 Apr 2023 20:11:56 +0800 Subject: [PATCH 3/4] resource_control: make metrics time point match in dynamic calibrate (#43248) ref pingcap/tidb#43212 --- executor/calibrate_resource.go | 93 +++++++++--- executor/calibrate_resource_test.go | 211 +++++++++++++++++++++++++++- 2 files changed, 283 insertions(+), 21 deletions(-) diff --git a/executor/calibrate_resource.go b/executor/calibrate_resource.go index 55081c4e5d192..3e7fa2265c8f1 100644 --- a/executor/calibrate_resource.go +++ b/executor/calibrate_resource.go @@ -27,6 +27,7 @@ import ( "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/parser/ast" "github.com/pingcap/tidb/parser/duration" + "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/sessionctx/variable" "github.com/pingcap/tidb/sessiontxn/staleread" "github.com/pingcap/tidb/util/chunk" @@ -211,34 +212,48 @@ func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk if err != nil { return err } - rus, err := getRUPerSec(ctx, exec, startTime, endTime) + rus, err := getRUPerSec(ctx, e.ctx, exec, startTime, endTime) if err != nil { return err } - tikvCPUs, err := getComponentCPUUsagePerSec(ctx, exec, "tikv", startTime, endTime) + tikvCPUs, err := getComponentCPUUsagePerSec(ctx, e.ctx, exec, "tikv", startTime, endTime) if err != nil { return err } - tidbCPUs, err := getComponentCPUUsagePerSec(ctx, exec, "tidb", startTime, endTime) + tidbCPUs, err := getComponentCPUUsagePerSec(ctx, e.ctx, exec, "tidb", startTime, endTime) if err != nil { return err } quotas := make([]float64, 0) lowCount := 0 - for idx, ru := range rus { - if idx >= len(tikvCPUs) || idx >= len(tidbCPUs) { + for { + if rus.isEnd() || tikvCPUs.isEnd() || tidbCPUs.isEnd() { break } - tikvQuota, tidbQuota := tikvCPUs[idx]/totalKVCPUQuota, tidbCPUs[idx]/totalTiDBCPU + // make time point match + maxTime := rus.getTime() + if tikvCPUs.getTime().After(maxTime) { + maxTime = tikvCPUs.getTime() + } + if tidbCPUs.getTime().After(maxTime) { + maxTime = tidbCPUs.getTime() + } + if !rus.advance(maxTime) || !tikvCPUs.advance(maxTime) || !tidbCPUs.advance(maxTime) { + continue + } + tikvQuota, tidbQuota := tikvCPUs.getValue()/totalKVCPUQuota, tidbCPUs.getValue()/totalTiDBCPU // If one of the two cpu usage is greater than the `valuableUsageThreshold`, we can accept it. // And if both are greater than the `lowUsageThreshold`, we can also accept it. if tikvQuota > valuableUsageThreshold || tidbQuota > valuableUsageThreshold { - quotas = append(quotas, ru/mathutil.Max(tikvQuota, tidbQuota)) + quotas = append(quotas, rus.getValue()/mathutil.Max(tikvQuota, tidbQuota)) } else if tikvQuota < lowUsageThreshold || tidbQuota < lowUsageThreshold { lowCount++ } else { - quotas = append(quotas, ru/mathutil.Max(tikvQuota, tidbQuota)) + quotas = append(quotas, rus.getValue()/mathutil.Max(tikvQuota, tidbQuota)) } + rus.next() + tidbCPUs.next() + tikvCPUs.next() } if len(quotas) < 5 { return errors.Errorf("There are too few metrics points available in selected time window") @@ -312,14 +327,51 @@ func getTiDBTotalCPUQuota(ctx context.Context, exec sqlexec.RestrictedSQLExecuto return getNumberFromMetrics(ctx, exec, query, "tidb_server_maxprocs") } -func getRUPerSec(ctx context.Context, exec sqlexec.RestrictedSQLExecutor, startTime, endTime string) ([]float64, error) { - query := fmt.Sprintf("SELECT value FROM METRICS_SCHEMA.resource_manager_resource_unit where time >= '%s' and time <= '%s' ORDER BY time desc", startTime, endTime) - return getValuesFromMetrics(ctx, exec, query, "resource_manager_resource_unit") +type timePointValue struct { + tp time.Time + val float64 +} + +type timeSeriesValues struct { + idx int + vals []*timePointValue +} + +func (t *timeSeriesValues) isEnd() bool { + return t.idx >= len(t.vals) +} + +func (t *timeSeriesValues) next() { + t.idx++ +} + +func (t *timeSeriesValues) getTime() time.Time { + return t.vals[t.idx].tp +} + +func (t *timeSeriesValues) getValue() float64 { + return t.vals[t.idx].val +} + +func (t *timeSeriesValues) advance(target time.Time) bool { + for ; t.idx < len(t.vals); t.idx++ { + // `target` is maximal time in other timeSeriesValues, + // so we should find the time which offset is less than 10s. + if t.vals[t.idx].tp.Add(time.Second * 10).After(target) { + return t.vals[t.idx].tp.Add(-time.Second * 10).Before(target) + } + } + return false } -func getComponentCPUUsagePerSec(ctx context.Context, exec sqlexec.RestrictedSQLExecutor, component, startTime, endTime string) ([]float64, error) { - query := fmt.Sprintf("SELECT sum(value) FROM METRICS_SCHEMA.process_cpu_usage where time >= '%s' and time <= '%s' and job like '%%%s' GROUP BY time ORDER BY time desc", startTime, endTime, component) - return getValuesFromMetrics(ctx, exec, query, "process_cpu_usage") +func getRUPerSec(ctx context.Context, sctx sessionctx.Context, exec sqlexec.RestrictedSQLExecutor, startTime, endTime string) (*timeSeriesValues, error) { + query := fmt.Sprintf("SELECT time, value FROM METRICS_SCHEMA.resource_manager_resource_unit where time >= '%s' and time <= '%s' ORDER BY time asc", startTime, endTime) + return getValuesFromMetrics(ctx, sctx, exec, query, "resource_manager_resource_unit") +} + +func getComponentCPUUsagePerSec(ctx context.Context, sctx sessionctx.Context, exec sqlexec.RestrictedSQLExecutor, component, startTime, endTime string) (*timeSeriesValues, error) { + query := fmt.Sprintf("SELECT time, sum(value) FROM METRICS_SCHEMA.process_cpu_usage where time >= '%s' and time <= '%s' and job like '%%%s' GROUP BY time ORDER BY time asc", startTime, endTime, component) + return getValuesFromMetrics(ctx, sctx, exec, query, "process_cpu_usage") } func getNumberFromMetrics(ctx context.Context, exec sqlexec.RestrictedSQLExecutor, query, metrics string) (float64, error) { @@ -334,7 +386,7 @@ func getNumberFromMetrics(ctx context.Context, exec sqlexec.RestrictedSQLExecuto return rows[0].GetFloat64(0), nil } -func getValuesFromMetrics(ctx context.Context, exec sqlexec.RestrictedSQLExecutor, query, metrics string) ([]float64, error) { +func getValuesFromMetrics(ctx context.Context, sctx sessionctx.Context, exec sqlexec.RestrictedSQLExecutor, query, metrics string) (*timeSeriesValues, error) { rows, _, err := exec.ExecRestrictedSQL(ctx, []sqlexec.OptionFuncAlias{sqlexec.ExecOptionUseCurSession}, query) if err != nil { return nil, errors.Trace(err) @@ -342,9 +394,14 @@ func getValuesFromMetrics(ctx context.Context, exec sqlexec.RestrictedSQLExecuto if len(rows) == 0 { return nil, errors.Errorf("metrics '%s' is empty", metrics) } - ret := make([]float64, 0, len(rows)) + ret := make([]*timePointValue, 0, len(rows)) for _, row := range rows { - ret = append(ret, row.GetFloat64(0)) + if tp, err := row.GetTime(0).AdjustedGoTime(sctx.GetSessionVars().Location()); err == nil { + ret = append(ret, &timePointValue{ + tp: tp, + val: row.GetFloat64(1), + }) + } } - return ret, nil + return &timeSeriesValues{idx: 0, vals: ret}, nil } diff --git a/executor/calibrate_resource_test.go b/executor/calibrate_resource_test.go index 874eca8ddae38..f4850f9d056c8 100644 --- a/executor/calibrate_resource_test.go +++ b/executor/calibrate_resource_test.go @@ -126,7 +126,7 @@ func TestCalibrateResource(t *testing.T) { tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE").Check(testkit.Rows("38760")) // construct data for dynamic calibrate - mockData["resource_manager_resource_unit"] = [][]types.Datum{ + ru1 := [][]types.Datum{ types.MakeDatums(datetime("2020-02-12 10:35:00"), 2200.0), types.MakeDatums(datetime("2020-02-12 10:36:00"), 2100.0), types.MakeDatums(datetime("2020-02-12 10:37:00"), 2250.0), @@ -139,8 +139,9 @@ func TestCalibrateResource(t *testing.T) { types.MakeDatums(datetime("2020-02-12 10:44:00"), 2300.0), types.MakeDatums(datetime("2020-02-12 10:45:00"), 2280.0), } + mockData["resource_manager_resource_unit"] = ru1 - mockData["process_cpu_usage"] = [][]types.Datum{ + cpu1 := [][]types.Datum{ types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0", "tidb", 1.212), types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0", "tidb", 1.233), types.MakeDatums(datetime("2020-02-12 10:37:00"), "tidb-0", "tidb", 1.234), @@ -186,11 +187,12 @@ func TestCalibrateResource(t *testing.T) { types.MakeDatums(datetime("2020-02-12 10:44:00"), "tikv-2", "tikv", 2.120), types.MakeDatums(datetime("2020-02-12 10:45:00"), "tikv-2", "tikv", 2.281), } + mockData["process_cpu_usage"] = cpu1 tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE START_TIME '2020-02-12 10:35:00' DURATION '10m'").Check(testkit.Rows("8161")) tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE START_TIME '2020-02-12 10:35:00' END_TIME '2020-02-12 10:45:00'").Check(testkit.Rows("8161")) - mockData["process_cpu_usage"] = [][]types.Datum{ + cpu2 := [][]types.Datum{ types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0", "tidb", 3.212), types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0", "tidb", 3.233), types.MakeDatums(datetime("2020-02-12 10:37:00"), "tidb-0", "tidb", 3.234), @@ -236,6 +238,7 @@ func TestCalibrateResource(t *testing.T) { types.MakeDatums(datetime("2020-02-12 10:44:00"), "tikv-2", "tikv", 2.120), types.MakeDatums(datetime("2020-02-12 10:45:00"), "tikv-2", "tikv", 2.281), } + mockData["process_cpu_usage"] = cpu2 tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE START_TIME '2020-02-12 10:35:00' DURATION '10m'").Check(testkit.Rows("5616")) tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE START_TIME '2020-02-12 10:35:00' END_TIME '2020-02-12 10:45:00'").Check(testkit.Rows("5616")) @@ -243,6 +246,208 @@ func TestCalibrateResource(t *testing.T) { tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE END_TIME '2020-02-12 10:45:00' START_TIME '2020-02-12 10:35:00'").Check(testkit.Rows("5616")) tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE END_TIME '2020-02-12 10:45:00' DURATION '5m' START_TIME '2020-02-12 10:35:00' ").Check(testkit.Rows("5616")) + // Statistical time points do not correspond + ruModify1 := [][]types.Datum{ + types.MakeDatums(datetime("2020-02-12 10:25:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:26:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:27:00"), 4.0), + types.MakeDatums(datetime("2020-02-12 10:28:00"), 6.0), + types.MakeDatums(datetime("2020-02-12 10:29:00"), 3.0), + types.MakeDatums(datetime("2020-02-12 10:30:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:31:00"), 7.0), + types.MakeDatums(datetime("2020-02-12 10:32:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:33:00"), 7.0), + types.MakeDatums(datetime("2020-02-12 10:34:00"), 8.0), + types.MakeDatums(datetime("2020-02-12 10:35:00"), 2200.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), 2100.0), + types.MakeDatums(datetime("2020-02-12 10:37:00"), 2250.0), + types.MakeDatums(datetime("2020-02-12 10:38:00"), 2300.0), + types.MakeDatums(datetime("2020-02-12 10:39:00"), 2230.0), + types.MakeDatums(datetime("2020-02-12 10:40:00"), 2210.0), + types.MakeDatums(datetime("2020-02-12 10:41:00"), 2250.0), + types.MakeDatums(datetime("2020-02-12 10:42:00"), 2330.0), + types.MakeDatums(datetime("2020-02-12 10:43:00"), 2330.0), + types.MakeDatums(datetime("2020-02-12 10:44:00"), 2300.0), + types.MakeDatums(datetime("2020-02-12 10:45:00"), 2280.0), + types.MakeDatums(datetime("2020-02-12 10:46:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:47:00"), 7.0), + types.MakeDatums(datetime("2020-02-12 10:48:00"), 8.0), + } + mockData["resource_manager_resource_unit"] = ruModify1 + tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE START_TIME '2020-02-12 10:25:00' DURATION '20m'").Check(testkit.Rows("5616")) + + ruModify2 := [][]types.Datum{ + types.MakeDatums(datetime("2020-02-12 10:25:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:26:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:27:00"), 4.0), + types.MakeDatums(datetime("2020-02-12 10:28:00"), 6.0), + types.MakeDatums(datetime("2020-02-12 10:29:00"), 2200.0), + types.MakeDatums(datetime("2020-02-12 10:30:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:31:00"), 7.0), + types.MakeDatums(datetime("2020-02-12 10:32:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:33:00"), 7.0), + types.MakeDatums(datetime("2020-02-12 10:34:00"), 8.0), + types.MakeDatums(datetime("2020-02-12 10:35:00"), 29.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), 2100.0), + types.MakeDatums(datetime("2020-02-12 10:37:00"), 49.0), + types.MakeDatums(datetime("2020-02-12 10:38:00"), 2300.0), + types.MakeDatums(datetime("2020-02-12 10:39:00"), 2230.0), + types.MakeDatums(datetime("2020-02-12 10:40:00"), 2210.0), + types.MakeDatums(datetime("2020-02-12 10:41:00"), 47.0), + types.MakeDatums(datetime("2020-02-12 10:42:00"), 2330.0), + types.MakeDatums(datetime("2020-02-12 10:43:00"), 2330.0), + types.MakeDatums(datetime("2020-02-12 10:44:00"), 2300.0), + types.MakeDatums(datetime("2020-02-12 10:45:00"), 2280.0), + types.MakeDatums(datetime("2020-02-12 10:47:00"), 2250.0), + types.MakeDatums(datetime("2020-02-12 10:49:00"), 2250.0), + } + mockData["resource_manager_resource_unit"] = ruModify2 + cpu2Mofidy := [][]types.Datum{ + types.MakeDatums(datetime("2020-02-12 10:29:00"), "tidb-0", "tidb", 3.212), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0", "tidb", 3.233), + types.MakeDatums(datetime("2020-02-12 10:38:00"), "tidb-0", "tidb", 3.213), + types.MakeDatums(datetime("2020-02-12 10:39:00"), "tidb-0", "tidb", 3.209), + types.MakeDatums(datetime("2020-02-12 10:40:00"), "tidb-0", "tidb", 3.213), + types.MakeDatums(datetime("2020-02-12 10:42:00"), "tidb-0", "tidb", 3.228), + types.MakeDatums(datetime("2020-02-12 10:43:00"), "tidb-0", "tidb", 3.219), + types.MakeDatums(datetime("2020-02-12 10:44:00"), "tidb-0", "tidb", 3.220), + types.MakeDatums(datetime("2020-02-12 10:45:00"), "tidb-0", "tidb", 3.221), + types.MakeDatums(datetime("2020-02-12 10:46:00"), "tidb-0", "tidb", 3.220), + types.MakeDatums(datetime("2020-02-12 10:47:00"), "tidb-0", "tidb", 3.236), + types.MakeDatums(datetime("2020-02-12 10:48:00"), "tidb-0", "tidb", 3.220), + types.MakeDatums(datetime("2020-02-12 10:49:00"), "tidb-0", "tidb", 3.234), + types.MakeDatums(datetime("2020-02-12 10:29:00"), "tikv-1", "tikv", 2.212), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-1", "tikv", 2.233), + types.MakeDatums(datetime("2020-02-12 10:49:00"), "tikv-1", "tikv", 2.234), + types.MakeDatums(datetime("2020-02-12 10:38:00"), "tikv-1", "tikv", 2.213), + types.MakeDatums(datetime("2020-02-12 10:39:00"), "tikv-1", "tikv", 2.209), + types.MakeDatums(datetime("2020-02-12 10:46:00"), "tikv-1", "tikv", 3.220), + types.MakeDatums(datetime("2020-02-12 10:40:00"), "tikv-1", "tikv", 2.213), + types.MakeDatums(datetime("2020-02-12 10:47:00"), "tikv-1", "tikv", 2.236), + types.MakeDatums(datetime("2020-02-12 10:42:00"), "tikv-1", "tikv", 2.228), + types.MakeDatums(datetime("2020-02-12 10:43:00"), "tikv-1", "tikv", 2.219), + types.MakeDatums(datetime("2020-02-12 10:44:00"), "tikv-1", "tikv", 2.220), + types.MakeDatums(datetime("2020-02-12 10:45:00"), "tikv-1", "tikv", 2.281), + types.MakeDatums(datetime("2020-02-12 10:29:00"), "tikv-0", "tikv", 2.282), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", "tikv", 2.283), + types.MakeDatums(datetime("2020-02-12 10:49:00"), "tikv-0", "tikv", 2.284), + types.MakeDatums(datetime("2020-02-12 10:38:00"), "tikv-0", "tikv", 2.283), + types.MakeDatums(datetime("2020-02-12 10:39:00"), "tikv-0", "tikv", 2.289), + types.MakeDatums(datetime("2020-02-12 10:40:00"), "tikv-0", "tikv", 2.283), + types.MakeDatums(datetime("2020-02-12 10:47:00"), "tikv-0", "tikv", 2.286), + types.MakeDatums(datetime("2020-02-12 10:42:00"), "tikv-0", "tikv", 2.288), + types.MakeDatums(datetime("2020-02-12 10:43:00"), "tikv-0", "tikv", 2.289), + types.MakeDatums(datetime("2020-02-12 10:44:00"), "tikv-0", "tikv", 2.280), + types.MakeDatums(datetime("2020-02-12 10:45:00"), "tikv-0", "tikv", 2.281), + types.MakeDatums(datetime("2020-02-12 10:29:00"), "tikv-2", "tikv", 2.112), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-2", "tikv", 2.133), + types.MakeDatums(datetime("2020-02-12 10:49:00"), "tikv-2", "tikv", 2.134), + types.MakeDatums(datetime("2020-02-12 10:38:00"), "tikv-2", "tikv", 2.113), + types.MakeDatums(datetime("2020-02-12 10:39:00"), "tikv-2", "tikv", 2.109), + types.MakeDatums(datetime("2020-02-12 10:40:00"), "tikv-2", "tikv", 2.113), + types.MakeDatums(datetime("2020-02-12 10:47:00"), "tikv-2", "tikv", 2.136), + types.MakeDatums(datetime("2020-02-12 10:42:00"), "tikv-2", "tikv", 2.128), + types.MakeDatums(datetime("2020-02-12 10:43:00"), "tikv-2", "tikv", 2.119), + types.MakeDatums(datetime("2020-02-12 10:44:00"), "tikv-2", "tikv", 2.120), + types.MakeDatums(datetime("2020-02-12 10:45:00"), "tikv-2", "tikv", 2.281), + types.MakeDatums(datetime("2020-02-12 10:48:00"), "tikv-2", "tikv", 3.220), + } + mockData["process_cpu_usage"] = cpu2Mofidy + tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE START_TIME '2020-02-12 10:25:00' DURATION '20m'").Check(testkit.Rows("5616")) + + ruModify3 := [][]types.Datum{ + types.MakeDatums(datetime("2020-02-12 10:25:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:26:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:27:00"), 4.0), + types.MakeDatums(datetime("2020-02-12 10:28:00"), 6.0), + types.MakeDatums(datetime("2020-02-12 10:29:00"), 2200.0), + types.MakeDatums(datetime("2020-02-12 10:30:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:31:00"), 7.0), + types.MakeDatums(datetime("2020-02-12 10:32:00"), 5.0), + types.MakeDatums(datetime("2020-02-12 10:33:00"), 7.0), + types.MakeDatums(datetime("2020-02-12 10:34:00"), 8.0), + types.MakeDatums(datetime("2020-02-12 10:35:00"), 29.0), + types.MakeDatums(datetime("2020-02-12 10:36:20"), 2100.0), + types.MakeDatums(datetime("2020-02-12 10:37:00"), 49.0), + types.MakeDatums(datetime("2020-02-12 10:38:00"), 2300.0), + types.MakeDatums(datetime("2020-02-12 10:39:00"), 2230.0), + types.MakeDatums(datetime("2020-02-12 10:40:00"), 2210.0), + types.MakeDatums(datetime("2020-02-12 10:41:00"), 47.0), + types.MakeDatums(datetime("2020-02-12 10:42:20"), 2330.0), + types.MakeDatums(datetime("2020-02-12 10:43:00"), 2330.0), + types.MakeDatums(datetime("2020-02-12 10:44:00"), 2300.0), + types.MakeDatums(datetime("2020-02-12 10:45:00"), 2280.0), + types.MakeDatums(datetime("2020-02-12 10:47:00"), 2250.0), + types.MakeDatums(datetime("2020-02-12 10:49:00"), 2250.0), + } + mockData["resource_manager_resource_unit"] = ruModify3 + // because there are 20s difference in two time points, the result is changed. + tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE START_TIME '2020-02-12 10:25:00' DURATION '20m'").Check(testkit.Rows("5613")) + + ru2 := [][]types.Datum{ + types.MakeDatums(datetime("2020-02-12 10:25:00"), 2200.0), + types.MakeDatums(datetime("2020-02-12 10:26:00"), 2100.0), + types.MakeDatums(datetime("2020-02-12 10:27:00"), 2250.0), + types.MakeDatums(datetime("2020-02-12 10:28:00"), 2300.0), + types.MakeDatums(datetime("2020-02-12 10:29:00"), 2230.0), + types.MakeDatums(datetime("2020-02-12 10:30:00"), 2210.0), + types.MakeDatums(datetime("2020-02-12 10:31:00"), 2250.0), + types.MakeDatums(datetime("2020-02-12 10:32:00"), 2330.0), + types.MakeDatums(datetime("2020-02-12 10:33:00"), 2330.0), + types.MakeDatums(datetime("2020-02-12 10:34:00"), 2300.0), + types.MakeDatums(datetime("2020-02-12 10:35:00"), 2280.0), + } + mockData["resource_manager_resource_unit"] = ru2 + rs, err = tk.Exec("CALIBRATE RESOURCE START_TIME '2020-02-12 10:25:00' DURATION '20m'") + require.NoError(t, err) + require.NotNil(t, rs) + err = rs.Next(ctx, rs.NewChunk(nil)) + require.ErrorContains(t, err, "There are too few metrics points available in selected time window") + + ru3 := [][]types.Datum{ + types.MakeDatums(datetime("2020-02-12 10:25:00"), 2200.0), + types.MakeDatums(datetime("2020-02-12 10:27:00"), 2100.0), + types.MakeDatums(datetime("2020-02-12 10:28:00"), 2250.0), + types.MakeDatums(datetime("2020-02-12 10:30:00"), 2300.0), + types.MakeDatums(datetime("2020-02-12 10:31:00"), 2230.0), + types.MakeDatums(datetime("2020-02-12 10:33:00"), 2210.0), + types.MakeDatums(datetime("2020-02-12 10:34:00"), 2250.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), 2330.0), + types.MakeDatums(datetime("2020-02-12 10:37:00"), 2330.0), + types.MakeDatums(datetime("2020-02-12 10:39:00"), 2280.0), + types.MakeDatums(datetime("2020-02-12 10:40:00"), 2280.0), + types.MakeDatums(datetime("2020-02-12 10:42:00"), 2280.0), + types.MakeDatums(datetime("2020-02-12 10:43:00"), 2280.0), + } + mockData["resource_manager_resource_unit"] = ru3 + cpu3 := [][]types.Datum{ + types.MakeDatums(datetime("2020-02-12 10:26:00"), "tidb-0", "tidb", 3.212), + types.MakeDatums(datetime("2020-02-12 10:29:00"), "tidb-0", "tidb", 3.233), + types.MakeDatums(datetime("2020-02-12 10:32:00"), "tidb-0", "tidb", 3.213), + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0", "tidb", 3.209), + types.MakeDatums(datetime("2020-02-12 10:38:00"), "tidb-0", "tidb", 3.213), + types.MakeDatums(datetime("2020-02-12 10:41:00"), "tidb-0", "tidb", 3.228), + types.MakeDatums(datetime("2020-02-12 10:44:00"), "tidb-0", "tidb", 3.219), + + types.MakeDatums(datetime("2020-02-12 10:26:00"), "tikv-0", "tikv", 2.282), + types.MakeDatums(datetime("2020-02-12 10:29:00"), "tikv-0", "tikv", 2.283), + types.MakeDatums(datetime("2020-02-12 10:32:00"), "tikv-0", "tikv", 2.284), + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", "tikv", 2.283), + types.MakeDatums(datetime("2020-02-12 10:38:00"), "tikv-0", "tikv", 2.289), + types.MakeDatums(datetime("2020-02-12 10:41:00"), "tikv-0", "tikv", 2.283), + types.MakeDatums(datetime("2020-02-12 10:44:00"), "tikv-0", "tikv", 2.286), + } + mockData["process_cpu_usage"] = cpu3 + rs, err = tk.Exec("CALIBRATE RESOURCE START_TIME '2020-02-12 10:25:00' DURATION '20m'") + require.NoError(t, err) + require.NotNil(t, rs) + err = rs.Next(ctx, rs.NewChunk(nil)) + require.ErrorContains(t, err, "There are too few metrics points available in selected time window") + + // flash back to init data. + mockData["resource_manager_resource_unit"] = ru1 + mockData["process_cpu_usage"] = cpu2 + rs, err = tk.Exec("CALIBRATE RESOURCE START_TIME '2020-02-12 10:35:00'") require.NoError(t, err) require.NotNil(t, rs) From 3e26267b0c0954c424340fcdb79e996cc7853c1a Mon Sep 17 00:00:00 2001 From: Yongbo Jiang Date: Fri, 16 Jun 2023 17:25:10 +0800 Subject: [PATCH 4/4] cherry pick 841aed8 Signed-off-by: Cabinfever_B --- executor/calibrate_resource.go | 45 +++++++++++++-------------- executor/calibrate_resource_test.go | 47 ++++++++++++++++++----------- 2 files changed, 53 insertions(+), 39 deletions(-) diff --git a/executor/calibrate_resource.go b/executor/calibrate_resource.go index 3e7fa2265c8f1..47c2d3cf93b75 100644 --- a/executor/calibrate_resource.go +++ b/executor/calibrate_resource.go @@ -196,6 +196,11 @@ func (e *calibrateResourceExec) Next(ctx context.Context, req *chunk.Chunk) erro return e.staticCalibrate(ctx, req, exec) } +var ( + errLowUsage = errors.Errorf("The workload in selected time window is too low, with which TiDB is unable to reach a capacity estimation; please select another time window with higher workload, or calibrate resource by hardware instead") + errNoCPUQuotaMetrics = errors.Normalize("There is no CPU quota metrics, %v") +) + func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk.Chunk, exec sqlexec.RestrictedSQLExecutor) error { startTs, endTs, err := e.parseCalibrateDuration() if err != nil { @@ -206,11 +211,11 @@ func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk totalKVCPUQuota, err := getTiKVTotalCPUQuota(ctx, exec) if err != nil { - return err + return errNoCPUQuotaMetrics.FastGenByArgs(err.Error()) } totalTiDBCPU, err := getTiDBTotalCPUQuota(ctx, exec) if err != nil { - return err + return errNoCPUQuotaMetrics.FastGenByArgs(err.Error()) } rus, err := getRUPerSec(ctx, e.ctx, exec, startTime, endTime) if err != nil { @@ -256,23 +261,22 @@ func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk tikvCPUs.next() } if len(quotas) < 5 { - return errors.Errorf("There are too few metrics points available in selected time window") + return errLowUsage } - if float64(len(quotas))/float64(len(quotas)+lowCount) > percentOfPass { - sort.Slice(quotas, func(i, j int) bool { - return quotas[i] > quotas[j] - }) - lowerBound := int(math.Round(float64(len(quotas)) * discardRate)) - upperBound := len(quotas) - lowerBound - sum := 0. - for i := lowerBound; i < upperBound; i++ { - sum += quotas[i] - } - quota := sum / float64(upperBound-lowerBound) - req.AppendUint64(0, uint64(quota)) - } else { - return errors.Errorf("The workload in selected time window is too low, with which TiDB is unable to reach a capacity estimation; please select another time window with higher workload, or calibrate resource by hardware instead") + if float64(len(quotas))/float64(len(quotas)+lowCount) <= percentOfPass { + return errLowUsage } + sort.Slice(quotas, func(i, j int) bool { + return quotas[i] > quotas[j] + }) + lowerBound := int(math.Round(float64(len(quotas)) * discardRate)) + upperBound := len(quotas) - lowerBound + sum := 0. + for i := lowerBound; i < upperBound; i++ { + sum += quotas[i] + } + quota := sum / float64(upperBound-lowerBound) + req.AppendUint64(0, uint64(quota)) return nil } @@ -287,11 +291,11 @@ func (e *calibrateResourceExec) staticCalibrate(ctx context.Context, req *chunk. totalKVCPUQuota, err := getTiKVTotalCPUQuota(ctx, exec) if err != nil { - return err + return errNoCPUQuotaMetrics.FastGenByArgs(err.Error()) } totalTiDBCPU, err := getTiDBTotalCPUQuota(ctx, exec) if err != nil { - return err + return errNoCPUQuotaMetrics.FastGenByArgs(err.Error()) } // The default workload to calculate the RU capacity. @@ -391,9 +395,6 @@ func getValuesFromMetrics(ctx context.Context, sctx sessionctx.Context, exec sql if err != nil { return nil, errors.Trace(err) } - if len(rows) == 0 { - return nil, errors.Errorf("metrics '%s' is empty", metrics) - } ret := make([]*timePointValue, 0, len(rows)) for _, row := range rows { if tp, err := row.GetTime(0).AdjustedGoTime(sctx.GetSessionVars().Location()); err == nil { diff --git a/executor/calibrate_resource_test.go b/executor/calibrate_resource_test.go index f4850f9d056c8..21bde73c79029 100644 --- a/executor/calibrate_resource_test.go +++ b/executor/calibrate_resource_test.go @@ -95,24 +95,30 @@ func TestCalibrateResource(t *testing.T) { return time } - mockData := map[string][][]types.Datum{ - "tikv_cpu_quota": { - types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", 8.0), - types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-1", 8.0), - types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-2", 8.0), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", 8.0), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-1", 8.0), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-2", 8.0), - }, - "tidb_server_maxprocs": { - types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0", 40.0), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0", 40.0), - }, - } + mockData := make(map[string][][]types.Datum) ctx := context.WithValue(context.Background(), "__mockMetricsTableData", mockData) ctx = failpoint.WithHook(ctx, func(_ context.Context, fpname string) bool { return fpName == fpname }) + rs, err = tk.Exec("CALIBRATE RESOURCE") + require.NoError(t, err) + require.NotNil(t, rs) + err = rs.Next(ctx, rs.NewChunk(nil)) + // because when mock metrics is empty, error is always `pd unavailable`, don't check detail. + require.ErrorContains(t, err, "There is no CPU quota metrics, query metric error: pd unavailable") + + mockData["tikv_cpu_quota"] = [][]types.Datum{ + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", 8.0), + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-1", 8.0), + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-2", 8.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", 8.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-1", 8.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-2", 8.0), + } + mockData["tidb_server_maxprocs"] = [][]types.Datum{ + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0", 40.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0", 40.0), + } tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE").Check(testkit.Rows("69768")) tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE WORKLOAD TPCC").Check(testkit.Rows("69768")) tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE WORKLOAD OLTP_READ_WRITE").Check(testkit.Rows("55823")) @@ -402,7 +408,7 @@ func TestCalibrateResource(t *testing.T) { require.NoError(t, err) require.NotNil(t, rs) err = rs.Next(ctx, rs.NewChunk(nil)) - require.ErrorContains(t, err, "There are too few metrics points available in selected time window") + require.ErrorContains(t, err, "The workload in selected time window is too low") ru3 := [][]types.Datum{ types.MakeDatums(datetime("2020-02-12 10:25:00"), 2200.0), @@ -442,7 +448,7 @@ func TestCalibrateResource(t *testing.T) { require.NoError(t, err) require.NotNil(t, rs) err = rs.Next(ctx, rs.NewChunk(nil)) - require.ErrorContains(t, err, "There are too few metrics points available in selected time window") + require.ErrorContains(t, err, "The workload in selected time window is too low") // flash back to init data. mockData["resource_manager_resource_unit"] = ru1 @@ -553,7 +559,14 @@ func TestCalibrateResource(t *testing.T) { require.NoError(t, err) require.NotNil(t, rs) err = rs.Next(ctx, rs.NewChunk(nil)) - require.ErrorContains(t, err, "There are too few metrics points available in selected time window") + require.ErrorContains(t, err, "The workload in selected time window is too low") + + delete(mockData, "process_cpu_usage") + rs, err = tk.Exec("CALIBRATE RESOURCE START_TIME '2020-02-12 10:35:00' END_TIME '2020-02-12 10:45:00'") + require.NoError(t, err) + require.NotNil(t, rs) + err = rs.Next(ctx, rs.NewChunk(nil)) + require.ErrorContains(t, err, "query metric error: pd unavailable") } type mockResourceGroupProvider struct {