diff --git a/ddl/column.go b/ddl/column.go index e2e66773b138d..046fb4bd9d205 100644 --- a/ddl/column.go +++ b/ddl/column.go @@ -14,6 +14,8 @@ package ddl import ( + "time" + "github.com/juju/errors" "github.com/ngaut/log" "github.com/pingcap/tidb/ast" @@ -307,6 +309,7 @@ func (d *ddl) backfillColumn(t table.Table, columnInfo *model.ColumnInfo, reorgI count := 0 for { + startTS := time.Now() handles, err := d.getSnapshotRows(t, version, seekHandle) if err != nil { return errors.Trace(err) @@ -314,14 +317,17 @@ func (d *ddl) backfillColumn(t table.Table, columnInfo *model.ColumnInfo, reorgI return nil } + count += len(handles) seekHandle = handles[len(handles)-1] + 1 + sub := time.Since(startTS).Seconds() err = d.backfillColumnData(t, columnInfo, handles, reorgInfo) if err != nil { + log.Warnf("[ddl] added column for %v rows failed, take time %v", count, sub) return errors.Trace(err) } - count += len(handles) - log.Infof("[ddl] added column for %v rows", count) + batchHandleDataHistogram.WithLabelValues(batchAddCol).Observe(sub) + log.Infof("[ddl] added column for %v rows, take time %v", count, sub) } } diff --git a/ddl/ddl_worker.go b/ddl/ddl_worker.go index 03ec667068ac8..9ca8d014279f7 100644 --- a/ddl/ddl_worker.go +++ b/ddl/ddl_worker.go @@ -43,7 +43,6 @@ func (d *ddl) doDDLJob(ctx context.Context, job *model.Job) error { err = t.EnQueueDDLJob(job) return errors.Trace(err) }) - if err != nil { return errors.Trace(err) } @@ -53,14 +52,23 @@ func (d *ddl) doDDLJob(ctx context.Context, job *model.Job) error { log.Warnf("[ddl] start DDL job %v", job) - jobID := job.ID - var historyJob *model.Job - + jobID := job.ID // for a job from start to end, the state of it will be none -> delete only -> write only -> reorganization -> public // for every state changes, we will wait as lease 2 * lease time, so here the ticker check is 10 * lease. ticker := time.NewTicker(chooseLeaseTime(10*d.lease, 10*time.Second)) - defer ticker.Stop() + startTS := time.Now() + jobsGauge.WithLabelValues(JobType(ddlJobFlag).String(), job.Type.String()).Inc() + defer func() { + ticker.Stop() + jobsGauge.WithLabelValues(JobType(ddlJobFlag).String(), job.Type.String()).Desc() + retLabel := handleJobSucc + if err != nil { + retLabel = handleJobFailed + } + handleJobHistogram.WithLabelValues(JobType(ddlJobFlag).String(), job.Type.String(), + retLabel).Observe(time.Since(startTS).Seconds()) + }() for { select { case <-d.ddlJobDoneCh: diff --git a/ddl/index.go b/ddl/index.go index 2cb8cb4da149b..47d959b91b2e9 100644 --- a/ddl/index.go +++ b/ddl/index.go @@ -14,6 +14,8 @@ package ddl import ( + "time" + "github.com/juju/errors" "github.com/ngaut/log" "github.com/pingcap/tidb/ast" @@ -350,6 +352,7 @@ func (d *ddl) addTableIndex(t table.Table, indexInfo *model.IndexInfo, reorgInfo count := 0 for { + startTS := time.Now() handles, err := d.getSnapshotRows(t, version, seekHandle) if err != nil { return errors.Trace(err) @@ -357,14 +360,18 @@ func (d *ddl) addTableIndex(t table.Table, indexInfo *model.IndexInfo, reorgInfo return nil } + count += len(handles) seekHandle = handles[len(handles)-1] + 1 err = d.backfillTableIndex(t, indexInfo, handles, reorgInfo) + sub := time.Since(startTS).Seconds() if err != nil { + log.Warnf("[ddl] added index for %v rows failed, take time %v", count, sub) return errors.Trace(err) } - count += len(handles) - log.Infof("[ddl] added index for %v rows", count) + batchHandleDataHistogram.WithLabelValues(batchAddIdx).Observe(sub) + log.Infof("[ddl] added index for %v rows, take time %v", count, sub) + } } diff --git a/ddl/metrics.go b/ddl/metrics.go new file mode 100644 index 0000000000000..e6919451e6453 --- /dev/null +++ b/ddl/metrics.go @@ -0,0 +1,57 @@ +// Copyright 2016 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package ddl + +import "github.com/prometheus/client_golang/prometheus" + +var ( + jobsGauge = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "tidb", + Subsystem: "ddl", + Name: "waiting_jobs", + Help: "Gauge of jobs.", + }, []string{"type", "action"}) + + // handle job result state. + handleJobSucc = "handle_job_succ" + handleJobFailed = "handle_job_failed" + handleJobHistogram = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "tidb", + Subsystem: "ddl", + Name: "handle_job_duration_seconds", + Help: "Bucketed histogram of processing time (s) of handle jobs", + Buckets: prometheus.ExponentialBuckets(0.0005, 2, 13), + }, []string{"type", "action", "result_state"}) + + // handle batch data type. + batchAddCol = "batch_add_col" + batchAddIdx = "batch_add_idx" + batchDelData = "batch_del_data" + batchHandleDataHistogram = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "tidb", + Subsystem: "ddl", + Name: "batch_add_or_del_data_succ", + Help: "Bucketed histogram of processing time (s) of batch handle data", + Buckets: prometheus.ExponentialBuckets(0.0005, 2, 13), + }, []string{"handle_data_type"}) +) + +func init() { + prometheus.MustRegister(jobsGauge) + prometheus.MustRegister(handleJobHistogram) + prometheus.MustRegister(batchHandleDataHistogram) +} diff --git a/ddl/reorg.go b/ddl/reorg.go index cb315217eea67..7cdf075d1ccad 100644 --- a/ddl/reorg.go +++ b/ddl/reorg.go @@ -173,6 +173,7 @@ func (d *ddl) delKeysWithPrefix(prefix kv.Key, jobType JobType) error { count := 0 for { + startTS := time.Now() keys := make([]kv.Key, 0, maxBatchSize) err := kv.RunInNewTxn(d.store, true, func(txn kv.Transaction) error { if err1 := d.isReorgRunnable(txn, jobType); err1 != nil { @@ -209,12 +210,14 @@ func (d *ddl) delKeysWithPrefix(prefix kv.Key, jobType JobType) error { count += len(keys) return nil }) - + sub := time.Since(startTS).Seconds() if err != nil { + log.Warnf("[ddl] deleted %v keys with prefix %q failed, take time %v", count, prefix, sub) return errors.Trace(err) } - log.Infof("[ddl] deleted %v keys with prefix %q", count, prefix) + batchHandleDataHistogram.WithLabelValues(batchDelData).Observe(sub) + log.Infof("[ddl] deleted %v keys with prefix %q take time %v", count, prefix, sub) // delete no keys, return. if len(keys) == 0 { return nil