From 636c0f565210577605d7a9bc96a808385a99c983 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Wed, 8 Dec 2021 15:55:56 +0800 Subject: [PATCH] *: fix changefeed checkpoint lag negative value error (#3013) (#3535) --- cdc/capture/capture.go | 36 +++++--- cdc/owner/changefeed.go | 13 ++- cdc/owner/changefeed_test.go | 2 + cdc/processor/manager.go | 1 + cdc/processor/processor.go | 29 ++++--- pkg/context/context.go | 14 +-- pkg/orchestrator/etcd_worker_test.go | 6 +- pkg/pdtime/acquirer.go | 125 +++++++++++++++++++++++++++ pkg/pdtime/acquirer_test.go | 60 +++++++++++++ pkg/txnutil/gc/gc_manager.go | 33 +++---- pkg/txnutil/gc/gc_manager_test.go | 45 ++++------ 11 files changed, 271 insertions(+), 93 deletions(-) create mode 100644 pkg/pdtime/acquirer.go create mode 100644 pkg/pdtime/acquirer_test.go diff --git a/cdc/capture/capture.go b/cdc/capture/capture.go index c6a174ee6c6..46e580c62e2 100644 --- a/cdc/capture/capture.go +++ b/cdc/capture/capture.go @@ -32,6 +32,7 @@ import ( cdcContext "github.com/pingcap/ticdc/pkg/context" cerror "github.com/pingcap/ticdc/pkg/errors" "github.com/pingcap/ticdc/pkg/orchestrator" + "github.com/pingcap/ticdc/pkg/pdtime" "github.com/pingcap/ticdc/pkg/version" tidbkv "github.com/pingcap/tidb/kv" pd "github.com/tikv/pd/client" @@ -54,12 +55,12 @@ type Capture struct { session *concurrency.Session election *concurrency.Election - pdClient pd.Client - kvStorage tidbkv.Storage - etcdClient *kv.CDCEtcdClient - grpcPool kv.GrpcPool - - cancel context.CancelFunc + pdClient pd.Client + kvStorage tidbkv.Storage + etcdClient *kv.CDCEtcdClient + grpcPool kv.GrpcPool + TimeAcquirer pdtime.TimeAcquirer + cancel context.CancelFunc newProcessorManager func() *processor.Manager newOwner func(pd.Client) *owner.Owner @@ -99,6 +100,12 @@ func (c *Capture) reset(ctx context.Context) error { } c.session = sess c.election = concurrency.NewElection(sess, kv.CaptureOwnerKey) + + if c.TimeAcquirer != nil { + c.TimeAcquirer.Stop() + } + c.TimeAcquirer = pdtime.NewTimeAcquirer(c.pdClient) + if c.grpcPool != nil { c.grpcPool.Close() } @@ -147,11 +154,12 @@ func (c *Capture) Run(ctx context.Context) error { func (c *Capture) run(stdCtx context.Context) error { ctx := cdcContext.NewContext(stdCtx, &cdcContext.GlobalVars{ - PDClient: c.pdClient, - KVStorage: c.kvStorage, - CaptureInfo: c.info, - EtcdClient: c.etcdClient, - GrpcPool: c.grpcPool, + PDClient: c.pdClient, + KVStorage: c.kvStorage, + CaptureInfo: c.info, + EtcdClient: c.etcdClient, + GrpcPool: c.grpcPool, + TimeAcquirer: c.TimeAcquirer, }) err := c.register(ctx) if err != nil { @@ -165,7 +173,7 @@ func (c *Capture) run(stdCtx context.Context) error { cancel() }() wg := new(sync.WaitGroup) - wg.Add(3) + wg.Add(4) var ownerErr, processorErr error go func() { defer wg.Done() @@ -187,6 +195,10 @@ func (c *Capture) run(stdCtx context.Context) error { processorErr = c.runEtcdWorker(ctx, c.processorManager, model.NewGlobalState(), processorFlushInterval) log.Info("the processor routine has exited", zap.Error(processorErr)) }() + go func() { + defer wg.Done() + c.TimeAcquirer.Run(ctx) + }() go func() { defer wg.Done() c.grpcPool.RecycleConn(ctx) diff --git a/cdc/owner/changefeed.go b/cdc/owner/changefeed.go index c6670f5098c..a4c1355e0ab 100644 --- a/cdc/owner/changefeed.go +++ b/cdc/owner/changefeed.go @@ -16,7 +16,6 @@ package owner import ( "context" "sync" - "time" "github.com/pingcap/errors" "github.com/pingcap/failpoint" @@ -178,7 +177,9 @@ func (c *changefeed) tick(ctx cdcContext.Context, state *model.ChangefeedReactor return errors.Trace(err) } if shouldUpdateState { - c.updateStatus(barrierTs) + pdTime, _ := ctx.GlobalVars().TimeAcquirer.CurrentTimeFromCached() + currentTs := oracle.GetPhysical(pdTime) + c.updateStatus(currentTs, barrierTs) } return nil } @@ -438,7 +439,7 @@ func (c *changefeed) asyncExecDDL(ctx cdcContext.Context, job *timodel.Job) (don return done, nil } -func (c *changefeed) updateStatus(barrierTs model.Ts) { +func (c *changefeed) updateStatus(currentTs int64, barrierTs model.Ts) { resolvedTs := barrierTs for _, position := range c.state.TaskPositions { if resolvedTs > position.ResolvedTs { @@ -470,12 +471,10 @@ func (c *changefeed) updateStatus(barrierTs model.Ts) { } return status, changed, nil }) - phyTs := oracle.ExtractPhysical(checkpointTs) + c.metricsChangefeedCheckpointTsGauge.Set(float64(phyTs)) - // It is more accurate to get tso from PD, but in most cases since we have - // deployed NTP service, a little bias is acceptable here. - c.metricsChangefeedCheckpointTsLagGauge.Set(float64(oracle.GetPhysical(time.Now())-phyTs) / 1e3) + c.metricsChangefeedCheckpointTsLagGauge.Set(float64(currentTs-phyTs) / 1e3) } func (c *changefeed) Close() { diff --git a/cdc/owner/changefeed_test.go b/cdc/owner/changefeed_test.go index 4d3e1cd3733..ec255aabd0f 100644 --- a/cdc/owner/changefeed_test.go +++ b/cdc/owner/changefeed_test.go @@ -26,6 +26,7 @@ import ( "github.com/pingcap/ticdc/pkg/config" cdcContext "github.com/pingcap/ticdc/pkg/context" "github.com/pingcap/ticdc/pkg/orchestrator" + "github.com/pingcap/ticdc/pkg/pdtime" "github.com/pingcap/ticdc/pkg/txnutil/gc" "github.com/pingcap/ticdc/pkg/util/testleak" "github.com/pingcap/ticdc/pkg/version" @@ -217,6 +218,7 @@ func (s *changefeedSuite) TestExecDDL(c *check.C) { AdvertiseAddr: "127.0.0.1:0000", Version: version.ReleaseVersion, }, + TimeAcquirer: pdtime.NewTimeAcquirer4Test(), }) ctx = cdcContext.WithChangefeedVars(ctx, &cdcContext.ChangefeedVars{ ID: "changefeed-id-test", diff --git a/cdc/processor/manager.go b/cdc/processor/manager.go index d1bf39e7854..29c4f6e737d 100644 --- a/cdc/processor/manager.go +++ b/cdc/processor/manager.go @@ -70,6 +70,7 @@ func (m *Manager) Tick(stdCtx context.Context, state orchestrator.ReactorState) if err := m.handleCommand(); err != nil { return state, err } + captureID := ctx.GlobalVars().CaptureInfo.ID var inactiveChangefeedCount int for changefeedID, changefeedState := range globalState.Changefeeds { diff --git a/cdc/processor/processor.go b/cdc/processor/processor.go index a69bb16502f..06fdfa2a16c 100644 --- a/cdc/processor/processor.go +++ b/cdc/processor/processor.go @@ -20,7 +20,6 @@ import ( "math" "strconv" "sync" - "time" "github.com/pingcap/errors" "github.com/pingcap/failpoint" @@ -154,7 +153,8 @@ func (p *processor) tick(ctx cdcContext.Context, state *model.ChangefeedReactorS if !p.checkChangefeedNormal() { return nil, cerror.ErrAdminStopProcessor.GenWithStackByArgs() } - if skip := p.checkPosition(); skip { + // we should skip this tick after create a task position + if p.createTaskPosition() { return p.changefeed, nil } if err := p.handleErrorCh(ctx); err != nil { @@ -169,7 +169,12 @@ func (p *processor) tick(ctx cdcContext.Context, state *model.ChangefeedReactorS if err := p.checkTablesNum(ctx); err != nil { return nil, errors.Trace(err) } - p.handlePosition() + + // it is no need to check the err here, because we will use + // local time when an error return, which is acceptable + pdTime, _ := ctx.GlobalVars().TimeAcquirer.CurrentTimeFromCached() + + p.handlePosition(oracle.GetPhysical(pdTime)) p.pushResolvedTs2Table() p.handleWorkload() p.doGCSchemaStorage(ctx) @@ -187,10 +192,10 @@ func (p *processor) checkChangefeedNormal() bool { return true } -// checkPosition create a new task position, and put it into the etcd state. -// task position maybe be not exist only when the processor is running first time. -func (p *processor) checkPosition() (skipThisTick bool) { - if p.changefeed.TaskPositions[p.captureInfo.ID] != nil { +// createTaskPosition will create a new task position if a task position does not exist. +// task position not exist only when the processor is running first in the first tick. +func (p *processor) createTaskPosition() (skipThisTick bool) { + if _, exist := p.changefeed.TaskPositions[p.captureInfo.ID]; exist { return false } if p.initialized { @@ -552,7 +557,7 @@ func (p *processor) checkTablesNum(ctx cdcContext.Context) error { } // handlePosition calculates the local resolved ts and local checkpoint ts -func (p *processor) handlePosition() { +func (p *processor) handlePosition(currentTs int64) { minResolvedTs := uint64(math.MaxUint64) if p.schemaStorage != nil { minResolvedTs = p.schemaStorage.ResolvedTs() @@ -573,15 +578,11 @@ func (p *processor) handlePosition() { } resolvedPhyTs := oracle.ExtractPhysical(minResolvedTs) - // It is more accurate to get tso from PD, but in most cases we have - // deployed NTP service, a little bias is acceptable here. - p.metricResolvedTsLagGauge.Set(float64(oracle.GetPhysical(time.Now())-resolvedPhyTs) / 1e3) + p.metricResolvedTsLagGauge.Set(float64(currentTs-resolvedPhyTs) / 1e3) p.metricResolvedTsGauge.Set(float64(resolvedPhyTs)) checkpointPhyTs := oracle.ExtractPhysical(minCheckpointTs) - // It is more accurate to get tso from PD, but in most cases we have - // deployed NTP service, a little bias is acceptable here. - p.metricCheckpointTsLagGauge.Set(float64(oracle.GetPhysical(time.Now())-checkpointPhyTs) / 1e3) + p.metricCheckpointTsLagGauge.Set(float64(currentTs-checkpointPhyTs) / 1e3) p.metricCheckpointTsGauge.Set(float64(checkpointPhyTs)) // minResolvedTs and minCheckpointTs may less than global resolved ts and global checkpoint ts when a new table added, the startTs of the new table is less than global checkpoint ts. diff --git a/pkg/context/context.go b/pkg/context/context.go index 21f99ffbeba..9449638e4a0 100644 --- a/pkg/context/context.go +++ b/pkg/context/context.go @@ -18,6 +18,8 @@ import ( "log" "time" + "github.com/pingcap/ticdc/pkg/pdtime" + "github.com/pingcap/ticdc/pkg/version" "github.com/pingcap/ticdc/cdc/kv" @@ -33,11 +35,12 @@ import ( // the lifecycle of vars in the GlobalVars should be aligned with the ticdc server process. // All field in Vars should be READ-ONLY and THREAD-SAFE type GlobalVars struct { - PDClient pd.Client - KVStorage tidbkv.Storage - CaptureInfo *model.CaptureInfo - EtcdClient *kv.CDCEtcdClient - GrpcPool kv.GrpcPool + PDClient pd.Client + KVStorage tidbkv.Storage + CaptureInfo *model.CaptureInfo + EtcdClient *kv.CDCEtcdClient + GrpcPool kv.GrpcPool + TimeAcquirer pdtime.TimeAcquirer } // ChangefeedVars contains some vars which can be used anywhere in a pipeline @@ -184,6 +187,7 @@ func NewBackendContext4Test(withChangefeedVars bool) Context { AdvertiseAddr: "127.0.0.1:0000", Version: version.ReleaseVersion, }, + TimeAcquirer: pdtime.NewTimeAcquirer4Test(), }) if withChangefeedVars { ctx = WithChangefeedVars(ctx, &ChangefeedVars{ diff --git a/pkg/orchestrator/etcd_worker_test.go b/pkg/orchestrator/etcd_worker_test.go index db032e1e2ce..661f93751bf 100644 --- a/pkg/orchestrator/etcd_worker_test.go +++ b/pkg/orchestrator/etcd_worker_test.go @@ -18,6 +18,7 @@ import ( "encoding/json" "regexp" "strconv" + "strings" "sync" "testing" "time" @@ -224,7 +225,6 @@ func (s *etcdWorkerSuite) TestEtcdSum(c *check.C) { defer func() { _ = cli.Unwrap().Close() }() - _, err := cli.Put(ctx, testEtcdKeyPrefix+"/sum", "0") c.Check(err, check.IsNil) @@ -273,7 +273,9 @@ func (s *etcdWorkerSuite) TestEtcdSum(c *check.C) { } err = errg.Wait() - if err != nil && (errors.Cause(err) == context.DeadlineExceeded || errors.Cause(err) == context.Canceled) { + if err != nil && (errors.Cause(err) == context.DeadlineExceeded || + errors.Cause(err) == context.Canceled || + strings.Contains(err.Error(), "etcdserver: request timeout")) { return } c.Check(err, check.IsNil) diff --git a/pkg/pdtime/acquirer.go b/pkg/pdtime/acquirer.go new file mode 100644 index 00000000000..f232b56b027 --- /dev/null +++ b/pkg/pdtime/acquirer.go @@ -0,0 +1,125 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package pdtime + +import ( + "context" + "sync" + "time" + + "github.com/pingcap/errors" + "github.com/pingcap/log" + "github.com/pingcap/ticdc/pkg/retry" + "github.com/tikv/client-go/v2/oracle" + pd "github.com/tikv/pd/client" + "go.uber.org/zap" +) + +const pdTimeUpdateInterval = 200 * time.Millisecond + +// TimeAcquirer cache time get from PD periodically +type TimeAcquirer interface { + // Run run the TimeAcquirer + Run(ctx context.Context) + // CurrentTimeFromCached returns current time from cache + CurrentTimeFromCached() (time.Time, error) + // Stop stops the TimeAcquirer + Stop() +} + +// TimeAcquirerImpl cache time get from PD periodically and cache it +type TimeAcquirerImpl struct { + pdClient pd.Client + timeCache time.Time + mu sync.RWMutex + cancel context.CancelFunc + err error +} + +// NewTimeAcquirer return a new TimeAcquirer +func NewTimeAcquirer(pdClient pd.Client) TimeAcquirer { + return &TimeAcquirerImpl{ + pdClient: pdClient, + } +} + +// Run will get time from pd periodically to cache in pdPhysicalTimeCache +func (c *TimeAcquirerImpl) Run(ctx context.Context) { + ctx, cancel := context.WithCancel(ctx) + c.cancel = cancel + ticker := time.NewTicker(pdTimeUpdateInterval) + for { + select { + // c.Stop() was called or parent ctx was canceled + case <-ctx.Done(): + log.Info("TimeAcquirer exit") + return + case <-ticker.C: + err := retry.Do(ctx, func() error { + physical, _, err := c.pdClient.GetTS(ctx) + if err != nil { + log.Info("get time from pd failed, retry later", zap.Error(err)) + return err + } + c.mu.Lock() + c.timeCache = oracle.GetTimeFromTS(oracle.ComposeTS(physical, 0)) + c.err = nil + c.mu.Unlock() + return nil + }, retry.WithBackoffBaseDelay(200), retry.WithMaxTries(10)) + if err != nil { + log.Warn("get time from pd failed, will use local time as pd time") + c.mu.Lock() + c.timeCache = time.Now() + c.err = err + c.mu.Unlock() + } + } + } +} + +// CurrentTimeFromCached return current time from pd cache +func (c *TimeAcquirerImpl) CurrentTimeFromCached() (time.Time, error) { + c.mu.RLock() + err := c.err + cacheTime := c.timeCache + c.mu.RUnlock() + return cacheTime, errors.Trace(err) +} + +// Stop stop TimeAcquirer +func (c *TimeAcquirerImpl) Stop() { + c.cancel() +} + +// TimeAcquirer4Test only for test +type TimeAcquirer4Test struct{} + +// NewTimeAcquirer4Test return a TimeAcquirer for test +func NewTimeAcquirer4Test() TimeAcquirer { + return &TimeAcquirer4Test{} +} + +// CurrentTimeFromCached return current time +func (c *TimeAcquirer4Test) CurrentTimeFromCached() (time.Time, error) { + return time.Now(), nil +} + +// Run implements TimeAcquirer +func (c *TimeAcquirer4Test) Run(ctx context.Context) { +} + +// Stop implements TimeAcquirer +func (c *TimeAcquirer4Test) Stop() { +} diff --git a/pkg/pdtime/acquirer_test.go b/pkg/pdtime/acquirer_test.go new file mode 100644 index 00000000000..92ab915b78e --- /dev/null +++ b/pkg/pdtime/acquirer_test.go @@ -0,0 +1,60 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package pdtime + +import ( + "context" + "testing" + "time" + + "github.com/pingcap/check" + "github.com/pingcap/ticdc/pkg/util/testleak" + "github.com/tikv/client-go/v2/oracle" + pd "github.com/tikv/pd/client" +) + +func TestSuite(t *testing.T) { check.TestingT(t) } + +type pdTimeSuite struct{} + +var _ = check.Suite(&pdTimeSuite{}) + +// MockPDClient mocks pd.Client to facilitate unit testing. +type MockPDClient struct { + pd.Client +} + +// GetTS implements pd.Client.GetTS. +func (m *MockPDClient) GetTS(ctx context.Context) (int64, int64, error) { + return oracle.GetPhysical(time.Now()), 0, nil +} + +func (s *pdTimeSuite) TestTimeFromPD(c *check.C) { + defer testleak.AfterTest(c)() + mockPDClient := &MockPDClient{} + TimeAcquirer := NewTimeAcquirer(mockPDClient) + go TimeAcquirer.Run(context.Background()) + defer TimeAcquirer.Stop() + time.Sleep(1 * time.Second) + + t1, err := TimeAcquirer.CurrentTimeFromCached() + c.Assert(err, check.IsNil) + + time.Sleep(400 * time.Millisecond) + // assume that the gc safe point updated one hour ago + t2, err := TimeAcquirer.CurrentTimeFromCached() + c.Assert(err, check.IsNil) + // should return new time + c.Assert(t1, check.Less, t2) +} diff --git a/pkg/txnutil/gc/gc_manager.go b/pkg/txnutil/gc/gc_manager.go index df997487dbf..1a158fa6f9d 100644 --- a/pkg/txnutil/gc/gc_manager.go +++ b/pkg/txnutil/gc/gc_manager.go @@ -22,6 +22,7 @@ import ( "github.com/pingcap/log" "github.com/pingcap/ticdc/cdc/model" "github.com/pingcap/ticdc/pkg/config" + cdcContext "github.com/pingcap/ticdc/pkg/context" cerror "github.com/pingcap/ticdc/pkg/errors" "github.com/tikv/client-go/v2/oracle" pd "github.com/tikv/pd/client" @@ -31,7 +32,6 @@ import ( const ( // CDCServiceSafePointID is the ID of CDC service in pd.UpdateServiceGCSafePoint. CDCServiceSafePointID = "ticdc" - pdTimeUpdateInterval = 10 * time.Minute ) // gcSafepointUpdateInterval is the minimum interval that CDC can update gc safepoint @@ -43,32 +43,27 @@ type Manager interface { // Manager may skip update when it thinks it is too frequent. // Set `forceUpdate` to force Manager update. TryUpdateGCSafePoint(ctx context.Context, checkpointTs model.Ts, forceUpdate bool) error - CurrentTimeFromPDCached(ctx context.Context) (time.Time, error) CheckStaleCheckpointTs(ctx context.Context, changefeedID model.ChangeFeedID, checkpointTs model.Ts) error } type gcManager struct { pdClient pd.Client - - gcTTL int64 + gcTTL int64 lastUpdatedTime time.Time lastSucceededTime time.Time lastSafePointTs uint64 isTiCDCBlockGC bool - - pdPhysicalTimeCache time.Time - lastUpdatedPdTime time.Time } // NewManager creates a new Manager. -func NewManager(pdClint pd.Client) Manager { +func NewManager(pdClient pd.Client) Manager { serverConfig := config.GetGlobalServerConfig() failpoint.Inject("InjectGcSafepointUpdateInterval", func(val failpoint.Value) { gcSafepointUpdateInterval = time.Duration(val.(int) * int(time.Millisecond)) }) return &gcManager{ - pdClient: pdClint, + pdClient: pdClient, lastSucceededTime: time.Now(), gcTTL: serverConfig.GcTTL, } @@ -111,25 +106,17 @@ func (m *gcManager) TryUpdateGCSafePoint( return nil } -func (m *gcManager) CurrentTimeFromPDCached(ctx context.Context) (time.Time, error) { - if time.Since(m.lastUpdatedPdTime) <= pdTimeUpdateInterval { - return m.pdPhysicalTimeCache, nil - } - physical, logical, err := m.pdClient.GetTS(ctx) - if err != nil { - return time.Now(), errors.Trace(err) - } - m.pdPhysicalTimeCache = oracle.GetTimeFromTS(oracle.ComposeTS(physical, logical)) - m.lastUpdatedPdTime = time.Now() - return m.pdPhysicalTimeCache, nil -} - func (m *gcManager) CheckStaleCheckpointTs( ctx context.Context, changefeedID model.ChangeFeedID, checkpointTs model.Ts, ) error { gcSafepointUpperBound := checkpointTs - 1 if m.isTiCDCBlockGC { - pdTime, err := m.CurrentTimeFromPDCached(ctx) + cctx, ok := ctx.(cdcContext.Context) + if !ok { + return cerror.ErrOwnerUnknown.GenWithStack("ctx not an cdcContext.Context, it should be") + } + pdTime, err := cctx.GlobalVars().TimeAcquirer.CurrentTimeFromCached() + // TODO: should we return err here, or just log it? if err != nil { return errors.Trace(err) } diff --git a/pkg/txnutil/gc/gc_manager_test.go b/pkg/txnutil/gc/gc_manager_test.go index 9ac9465b490..314631c3868 100644 --- a/pkg/txnutil/gc/gc_manager_test.go +++ b/pkg/txnutil/gc/gc_manager_test.go @@ -18,6 +18,8 @@ import ( "testing" "time" + "github.com/pingcap/ticdc/pkg/pdtime" + "github.com/pingcap/check" "github.com/pingcap/errors" cdcContext "github.com/pingcap/ticdc/pkg/context" @@ -88,49 +90,32 @@ func (s *gcManagerSuite) TestUpdateGCSafePoint(c *check.C) { } } -func (s *gcManagerSuite) TestTimeFromPD(c *check.C) { - defer testleak.AfterTest(c)() - mockPDClient := &MockPDClient{} - gcManager := NewManager(mockPDClient).(*gcManager) - ctx := cdcContext.NewBackendContext4Test(true) - ctx.GlobalVars().PDClient = mockPDClient - t1, err := gcManager.CurrentTimeFromPDCached(ctx) - c.Assert(err, check.IsNil) - c.Assert(t1, check.Equals, gcManager.pdPhysicalTimeCache) - - time.Sleep(50 * time.Millisecond) - // should return cached time - t2, err := gcManager.CurrentTimeFromPDCached(ctx) - c.Assert(err, check.IsNil) - c.Assert(t2, check.Equals, gcManager.pdPhysicalTimeCache) - c.Assert(t2, check.Equals, t1) - - time.Sleep(50 * time.Millisecond) - // assume that the gc safe point updated one hour ago - gcManager.lastUpdatedPdTime = time.Now().Add(-time.Hour) - t3, err := gcManager.CurrentTimeFromPDCached(ctx) - c.Assert(err, check.IsNil) - c.Assert(t3, check.Equals, gcManager.pdPhysicalTimeCache) - // should return new time - c.Assert(t3, check.Not(check.Equals), t2) -} - func (s *gcManagerSuite) TestCheckStaleCheckpointTs(c *check.C) { defer testleak.AfterTest(c)() mockPDClient := &MockPDClient{} gcManager := NewManager(mockPDClient).(*gcManager) gcManager.isTiCDCBlockGC = true ctx := context.Background() - err := gcManager.CheckStaleCheckpointTs(ctx, "cfID", 10) + + TimeAcquirer := pdtime.NewTimeAcquirer(mockPDClient) + go TimeAcquirer.Run(ctx) + time.Sleep(1 * time.Second) + defer TimeAcquirer.Stop() + + cCtx := cdcContext.NewContext(ctx, &cdcContext.GlobalVars{ + TimeAcquirer: TimeAcquirer, + }) + + err := gcManager.CheckStaleCheckpointTs(cCtx, "cfID", 10) c.Assert(cerror.ErrGCTTLExceeded.Equal(errors.Cause(err)), check.IsTrue) c.Assert(cerror.ChangefeedFastFailError(err), check.IsTrue) - err = gcManager.CheckStaleCheckpointTs(ctx, "cfID", oracle.GoTimeToTS(time.Now())) + err = gcManager.CheckStaleCheckpointTs(cCtx, "cfID", oracle.GoTimeToTS(time.Now())) c.Assert(err, check.IsNil) gcManager.isTiCDCBlockGC = false gcManager.lastSafePointTs = 20 - err = gcManager.CheckStaleCheckpointTs(ctx, "cfID", 10) + err = gcManager.CheckStaleCheckpointTs(cCtx, "cfID", 10) c.Assert(cerror.ErrSnapshotLostByGC.Equal(errors.Cause(err)), check.IsTrue) c.Assert(cerror.ChangefeedFastFailError(err), check.IsTrue) }