-
Notifications
You must be signed in to change notification settings - Fork 5.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
executor: support cleanup dangling index data command #6102
Changes from 10 commits
e1da0cc
f42bcc9
25f0efc
54e5a9f
2919e28
8e4925e
1f78194
ee99255
4b34f6f
651b8c8
4a0795c
2dc762a
e70601a
e5d491d
5d426cd
5989bf6
9617686
303bad4
04224eb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ import ( | |
"github.com/pingcap/tidb/statistics" | ||
"github.com/pingcap/tidb/table" | ||
"github.com/pingcap/tidb/table/tables" | ||
"github.com/pingcap/tidb/tablecodec" | ||
"github.com/pingcap/tidb/terror" | ||
"github.com/pingcap/tidb/types" | ||
"github.com/pingcap/tidb/util/chunk" | ||
|
@@ -38,6 +39,7 @@ import ( | |
var ( | ||
_ Executor = &CheckIndexRangeExec{} | ||
_ Executor = &RecoverIndexExec{} | ||
_ Executor = &CleanupIndexExec{} | ||
) | ||
|
||
// CheckIndexRangeExec outputs the index values which has handle between begin and end. | ||
|
@@ -457,3 +459,250 @@ func (e *RecoverIndexExec) NextChunk(ctx context.Context, chk *chunk.Chunk) erro | |
e.done = true | ||
return nil | ||
} | ||
|
||
// CleanupIndexExec represents a cleanup index executor. | ||
// It is built from "admin cleanup index" statement, is used to delete | ||
// dangling index data. | ||
type CleanupIndexExec struct { | ||
baseExecutor | ||
|
||
done bool | ||
removeCnt uint64 | ||
|
||
index table.Index | ||
table table.Table | ||
|
||
idxCols []*model.ColumnInfo | ||
idxColFieldTypes []*types.FieldType | ||
idxChunk *chunk.Chunk | ||
|
||
matchedIndex map[int64]idxData | ||
batchSize uint64 | ||
batchKeys []kv.Key | ||
idxValsBufs [][]types.Datum | ||
lastIdxKey []byte | ||
scanRowCnt uint64 | ||
} | ||
|
||
type idxData struct { | ||
matched bool | ||
idxVals []types.Datum | ||
} | ||
|
||
func (e *CleanupIndexExec) getIdxColTypes() []*types.FieldType { | ||
if e.idxColFieldTypes != nil { | ||
return e.idxColFieldTypes | ||
} | ||
e.idxColFieldTypes = make([]*types.FieldType, 0, len(e.idxCols)) | ||
for _, col := range e.idxCols { | ||
e.idxColFieldTypes = append(e.idxColFieldTypes, &col.FieldType) | ||
} | ||
return e.idxColFieldTypes | ||
} | ||
|
||
func (e *CleanupIndexExec) batchGetRecord(txn kv.Transaction) (map[string][]byte, error) { | ||
for handle := range e.matchedIndex { | ||
e.batchKeys = append(e.batchKeys, e.table.RecordKey(handle)) | ||
} | ||
values, err := txn.GetSnapshot().BatchGet(e.batchKeys) | ||
if err != nil { | ||
return nil, errors.Trace(err) | ||
} | ||
return values, nil | ||
} | ||
|
||
func (e *CleanupIndexExec) deleteDanglingIdx(txn kv.Transaction, values map[string][]byte) error { | ||
for _, k := range e.batchKeys { | ||
if _, found := values[string(k)]; !found { | ||
_, handle, err := tablecodec.DecodeRecordKey(k) | ||
if err != nil { | ||
return errors.Trace(err) | ||
} | ||
if err := e.index.Delete(e.ctx.GetSessionVars().StmtCtx, txn, e.matchedIndex[handle].idxVals, | ||
handle); err != nil { | ||
return errors.Trace(err) | ||
} | ||
e.removeCnt++ | ||
if e.removeCnt%e.batchSize == 0 { | ||
log.Infof("[cleaning up dangling index] table: %v, index: %v, count: %v.", | ||
e.table.Meta().Name.String(), e.index.Meta().Name.String(), e.removeCnt) | ||
} | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
func (e *CleanupIndexExec) extractIdxVals(row chunk.Row, idxVals []types.Datum) []types.Datum { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function logic is the same as RecoverIndexExec's extractIdxVals. Could we use the same one? |
||
if idxVals == nil { | ||
idxVals = make([]types.Datum, 0, row.Len()-1) | ||
} else { | ||
idxVals = idxVals[:0] | ||
} | ||
|
||
for i := 0; i < row.Len()-1; i++ { | ||
colVal := row.GetDatum(i, e.idxColFieldTypes[i]) | ||
idxVals = append(idxVals, *colVal.Copy()) | ||
} | ||
return idxVals | ||
} | ||
|
||
func (e *CleanupIndexExec) fetchIndex(ctx context.Context, txn kv.Transaction) error { | ||
result, err := e.buildIndexScan(ctx, txn) | ||
if err != nil { | ||
return errors.Trace(err) | ||
} | ||
defer terror.Call(result.Close) | ||
|
||
sc := e.ctx.GetSessionVars().StmtCtx | ||
for { | ||
err := result.NextChunk(ctx, e.idxChunk) | ||
if err != nil { | ||
return errors.Trace(err) | ||
} | ||
if e.idxChunk.NumRows() == 0 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should check the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. friendly ping @jackysp Please resolve reviewer's comment |
||
return nil | ||
} | ||
iter := chunk.NewIterator4Chunk(e.idxChunk) | ||
for row := iter.Begin(); row != iter.End(); row = iter.Next() { | ||
handle := row.GetInt64(len(e.idxCols) - 1) | ||
idxVals := e.extractIdxVals(row, e.idxValsBufs[e.scanRowCnt]) | ||
e.idxValsBufs[e.scanRowCnt] = idxVals | ||
e.matchedIndex[handle] = idxData{false, idxVals} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we remove the field of |
||
idxKey, _, err := e.index.GenIndexKey(sc, idxVals, handle, nil) | ||
if err != nil { | ||
return errors.Trace(err) | ||
} | ||
e.scanRowCnt++ | ||
e.lastIdxKey = idxKey | ||
if e.scanRowCnt >= e.batchSize { | ||
return nil | ||
} | ||
} | ||
} | ||
} | ||
|
||
// NextChunk implements the Executor NextChunk interface. | ||
func (e *CleanupIndexExec) NextChunk(ctx context.Context, chk *chunk.Chunk) error { | ||
chk.Reset() | ||
if e.done { | ||
return nil | ||
} | ||
count := 0 | ||
for { | ||
count++ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
errInTxn := kv.RunInNewTxn(e.ctx.GetStore(), true, func(txn kv.Transaction) error { | ||
err := e.fetchIndex(ctx, txn) | ||
if err != nil { | ||
return errors.Trace(err) | ||
} | ||
values, err := e.batchGetRecord(txn) | ||
if err != nil { | ||
return errors.Trace(err) | ||
} | ||
err = e.deleteDanglingIdx(txn, values) | ||
if err != nil { | ||
return errors.Trace(err) | ||
} | ||
return nil | ||
}) | ||
if errInTxn != nil { | ||
return errors.Trace(errInTxn) | ||
} | ||
if e.scanRowCnt == 0 { | ||
break | ||
} | ||
e.scanRowCnt = 0 | ||
e.batchKeys = e.batchKeys[:0] | ||
for k := range e.matchedIndex { | ||
delete(e.matchedIndex, k) | ||
} | ||
} | ||
e.done = true | ||
chk.AppendUint64(0, e.removeCnt) | ||
return nil | ||
} | ||
|
||
func (e *CleanupIndexExec) buildIndexScan(ctx context.Context, txn kv.Transaction) (distsql.SelectResult, error) { | ||
dagPB, err := e.buildIdxDAGPB(txn) | ||
if err != nil { | ||
return nil, errors.Trace(err) | ||
} | ||
sc := e.ctx.GetSessionVars().StmtCtx | ||
var builder distsql.RequestBuilder | ||
ranges := ranger.FullNewRange() | ||
kvReq, err := builder.SetIndexRanges(sc, e.table.Meta().ID, e.index.Meta().ID, ranges). | ||
SetDAGRequest(dagPB). | ||
SetKeepOrder(true). | ||
SetFromSessionVars(e.ctx.GetSessionVars()). | ||
Build() | ||
kvReq.KeyRanges[0].StartKey = kv.Key(e.lastIdxKey).PrefixNext() | ||
kvReq.Concurrency = 1 | ||
result, err := distsql.Select(ctx, e.ctx, kvReq, e.getIdxColTypes(), statistics.NewQueryFeedback(0, nil, 0, false)) | ||
if err != nil { | ||
return nil, errors.Trace(err) | ||
} | ||
result.Fetch(ctx) | ||
return result, nil | ||
} | ||
|
||
// Open implements the Executor Open interface. | ||
func (e *CleanupIndexExec) Open(ctx context.Context) error { | ||
if err := e.baseExecutor.Open(ctx); err != nil { | ||
return errors.Trace(err) | ||
} | ||
e.idxChunk = chunk.NewChunk(e.getIdxColTypes()) | ||
e.matchedIndex = make(map[int64]idxData, e.batchSize) | ||
e.batchKeys = make([]kv.Key, 0, e.batchSize) | ||
e.idxValsBufs = make([][]types.Datum, e.batchSize) | ||
sc := e.ctx.GetSessionVars().StmtCtx | ||
idxKey, _, err := e.index.GenIndexKey(sc, []types.Datum{{}}, math.MinInt64, nil) | ||
if err != nil { | ||
return errors.Trace(err) | ||
} | ||
e.lastIdxKey = idxKey | ||
return nil | ||
} | ||
|
||
func (e *CleanupIndexExec) buildIdxDAGPB(txn kv.Transaction) (*tipb.DAGRequest, error) { | ||
dagReq := &tipb.DAGRequest{} | ||
dagReq.StartTs = txn.StartTS() | ||
dagReq.TimeZoneOffset = timeZoneOffset(e.ctx) | ||
sc := e.ctx.GetSessionVars().StmtCtx | ||
dagReq.Flags = statementContextToFlags(sc) | ||
for i := range e.idxCols { | ||
dagReq.OutputOffsets = append(dagReq.OutputOffsets, uint32(i)) | ||
} | ||
|
||
execPB := e.constructIndexScanPB() | ||
dagReq.Executors = append(dagReq.Executors, execPB) | ||
err := plan.SetPBColumnsDefaultValue(e.ctx, dagReq.Executors[0].IdxScan.Columns, e.idxCols) | ||
if err != nil { | ||
return nil, errors.Trace(err) | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can these codes be extracted into a function? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. They are looked similar, but not exactly the same. It needs a refactor. Maybe in the next pr. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's the same as |
||
|
||
limitExec := e.constructLimitPB() | ||
dagReq.Executors = append(dagReq.Executors, limitExec) | ||
|
||
return dagReq, nil | ||
} | ||
|
||
func (e *CleanupIndexExec) constructIndexScanPB() *tipb.Executor { | ||
idxExec := &tipb.IndexScan{ | ||
TableId: e.table.Meta().ID, | ||
IndexId: e.index.Meta().ID, | ||
Columns: plan.ColumnsToProto(e.idxCols, e.table.Meta().PKIsHandle), | ||
} | ||
return &tipb.Executor{Tp: tipb.ExecType_TypeIndexScan, IdxScan: idxExec} | ||
} | ||
|
||
func (e *CleanupIndexExec) constructLimitPB() *tipb.Executor { | ||
limitExec := &tipb.Limit{ | ||
Limit: e.batchSize, | ||
} | ||
return &tipb.Executor{Tp: tipb.ExecType_TypeLimit, Limit: limitExec} | ||
} | ||
|
||
// Close implements the Executor Close interface. | ||
func (e *CleanupIndexExec) Close() error { | ||
return nil | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems we don't use this field.