Skip to content

Commit

Permalink
global sort: lower memLimit and other memory control change (#50640)
Browse files Browse the repository at this point in the history
ref #49829, close #50753, close #50755
  • Loading branch information
lance6716 authored Jan 30, 2024
1 parent 556c0d4 commit 44787d6
Show file tree
Hide file tree
Showing 9 changed files with 82 additions and 32 deletions.
4 changes: 3 additions & 1 deletion br/pkg/lightning/backend/external/byte_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import (
var (
// ConcurrentReaderBufferSizePerConc is the buffer size for concurrent reader per
// concurrency.
ConcurrentReaderBufferSizePerConc = int(4 * size.MB)
ConcurrentReaderBufferSizePerConc = int(2 * size.MB)
readAllDataConcThreshold = uint64(16)
)

Expand Down Expand Up @@ -307,6 +307,8 @@ func (r *byteReader) reload() error {
case io.ErrUnexpectedEOF:
// The last batch.
r.curBuf[0] = r.curBuf[0][:n]
case context.Canceled:
return err
default:
r.logger.Warn("other error during read", zap.Error(err))
return err
Expand Down
44 changes: 26 additions & 18 deletions br/pkg/lightning/backend/external/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"time"

"github.com/cockroachdb/pebble"
"github.com/docker/go-units"
"github.com/jfcg/sorty/v2"
"github.com/pingcap/errors"
"github.com/pingcap/failpoint"
Expand Down Expand Up @@ -62,11 +63,21 @@ type memKVsAndBuffers struct {
droppedSizePerFile []int
}

func (b *memKVsAndBuffers) build() {
func (b *memKVsAndBuffers) build(ctx context.Context) {
sumKVCnt := 0
for _, keys := range b.keysPerFile {
sumKVCnt += len(keys)
}
b.droppedSize = 0
for _, size := range b.droppedSizePerFile {
b.droppedSize += size
}
b.droppedSizePerFile = nil

logutil.Logger(ctx).Info("building memKVsAndBuffers",
zap.Int("sumKVCnt", sumKVCnt),
zap.Int("droppedSize", b.droppedSize))

b.keys = make([][]byte, 0, sumKVCnt)
b.values = make([][]byte, 0, sumKVCnt)
for i := range b.keysPerFile {
Expand All @@ -77,12 +88,6 @@ func (b *memKVsAndBuffers) build() {
}
b.keysPerFile = nil
b.valuesPerFile = nil

b.droppedSize = 0
for _, size := range b.droppedSizePerFile {
b.droppedSize += size
}
b.droppedSizePerFile = nil
}

// Engine stored sorted key/value pairs in an external storage.
Expand Down Expand Up @@ -121,7 +126,7 @@ type Engine struct {
importedKVCount *atomic.Int64
}

const memLimit = 16 * 1024 * 1024 * 1024
const memLimit = 12 * units.GiB

// NewExternalEngine creates an (external) engine.
func NewExternalEngine(
Expand Down Expand Up @@ -219,12 +224,14 @@ func getFilesReadConcurrency(
for i := range statsFiles {
result[i] = (endOffs[i] - startOffs[i]) / uint64(ConcurrentReaderBufferSizePerConc)
result[i] = max(result[i], 1)
logutil.Logger(ctx).Info("found hotspot file in getFilesReadConcurrency",
zap.String("filename", statsFiles[i]),
zap.Uint64("startOffset", startOffs[i]),
zap.Uint64("endOffset", endOffs[i]),
zap.Uint64("expected concurrency", result[i]),
)
if result[i] > 1 {
logutil.Logger(ctx).Info("found hotspot file in getFilesReadConcurrency",
zap.String("filename", statsFiles[i]),
zap.Uint64("startOffset", startOffs[i]),
zap.Uint64("endOffset", endOffs[i]),
zap.Uint64("expected concurrency", result[i]),
)
}
}
return result, startOffs, nil
}
Expand All @@ -251,7 +258,7 @@ func (e *Engine) loadBatchRegionData(ctx context.Context, startKey, endKey []byt
if err != nil {
return err
}
e.memKVsAndBuffers.build()
e.memKVsAndBuffers.build(ctx)

readSecond := time.Since(readStart).Seconds()
readDurHist.Observe(readSecond)
Expand Down Expand Up @@ -323,9 +330,8 @@ func (e *Engine) LoadIngestData(
regionRanges []common.Range,
outCh chan<- common.DataAndRange,
) error {
// currently we assume the region size is 96MB and will download 96MB*32 = 3GB
// data at once
regionBatchSize := 32
// try to make every worker busy for each batch
regionBatchSize := e.workerConcurrency
failpoint.Inject("LoadIngestDataBatchSize", func(val failpoint.Value) {
regionBatchSize = val.(int)
})
Expand Down Expand Up @@ -676,6 +682,8 @@ func (m *MemoryIngestData) IncRef() {
// DecRef implements IngestData.DecRef.
func (m *MemoryIngestData) DecRef() {
if m.refCnt.Dec() == 0 {
m.keys = nil
m.values = nil
for _, b := range m.memBuf {
b.Destroy()
}
Expand Down
2 changes: 1 addition & 1 deletion br/pkg/lightning/backend/external/merge_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ func MergeOverlappingFilesV2(
logutil.Logger(ctx).Warn("read all data failed", zap.Error(err1))
return
}
loaded.build()
loaded.build(ctx)
readTime := time.Since(now)
now = time.Now()
sorty.MaxGor = uint64(concurrency)
Expand Down
2 changes: 1 addition & 1 deletion br/pkg/lightning/backend/external/testutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ func testReadAndCompare(
loaded,
)
require.NoError(t, err)
loaded.build()
loaded.build(ctx)

// check kvs sorted
sorty.MaxGor = uint64(8)
Expand Down
6 changes: 2 additions & 4 deletions br/pkg/lightning/backend/local/localhelper.go
Original file line number Diff line number Diff line change
Expand Up @@ -410,10 +410,8 @@ func (local *Backend) BatchSplitRegions(
// Wait for a while until the regions successfully splits.
ok, err2 := local.hasRegion(ctx, region.Region.Id)
if !ok || err2 != nil {
if err2 == nil {
log.FromContext(ctx).Warn("split region failed", zap.Uint64("regionID", region.Region.Id))
} else {
log.FromContext(ctx).Warn("split region failed", zap.Uint64("regionID", region.Region.Id), zap.Error(err2))
if err2 != nil {
log.FromContext(ctx).Warn("split region failed with error", zap.Uint64("regionID", region.Region.Id), zap.Error(err2))
}
retryRegions = append(retryRegions, region)
continue
Expand Down
4 changes: 4 additions & 0 deletions br/pkg/membuf/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ go_library(
],
importpath = "github.com/pingcap/tidb/br/pkg/membuf",
visibility = ["//visibility:public"],
deps = [
"@com_github_pingcap_log//:log",
"@org_uber_go_zap//:zap",
],
)

go_test(
Expand Down
27 changes: 21 additions & 6 deletions br/pkg/membuf/limiter.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,26 @@

package membuf

import "sync"
import (
"sync"

"github.com/pingcap/log"
"go.uber.org/zap"
)

// Limiter will block on Acquire if the number it has acquired and not released
// exceeds the limit.
type Limiter struct {
limit int
mu sync.Mutex
waitNums []int
waitChs []chan struct{}
initLimit int
limit int
mu sync.Mutex
waitNums []int
waitChs []chan struct{}
}

// NewLimiter creates a new Limiter with the given limit.
func NewLimiter(limit int) *Limiter {
return &Limiter{limit: limit}
return &Limiter{limit: limit, initLimit: limit}
}

// Acquire acquires n tokens from the limiter. If the number of tokens acquired
Expand Down Expand Up @@ -56,6 +62,15 @@ func (l *Limiter) Release(n int) {
l.mu.Lock()

l.limit += n
if l.limit > l.initLimit {
log.Error(
"limit overflow",
zap.Int("limit", l.limit),
zap.Int("initLimit", l.initLimit),
zap.Stack("stack"),
)
}

for len(l.waitNums) > 0 && l.limit >= l.waitNums[0] {
l.limit -= l.waitNums[0]
close(l.waitChs[0])
Expand Down
1 change: 1 addition & 0 deletions br/pkg/storage/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ go_library(
"//pkg/sessionctx/variable",
"//pkg/util",
"//pkg/util/intest",
"//pkg/util/logutil",
"//pkg/util/prefetch",
"@com_github_aliyun_alibaba_cloud_sdk_go//sdk/auth/credentials",
"@com_github_aliyun_alibaba_cloud_sdk_go//sdk/auth/credentials/providers",
Expand Down
24 changes: 23 additions & 1 deletion br/pkg/storage/gcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
berrors "github.com/pingcap/tidb/br/pkg/errors"
"github.com/pingcap/tidb/pkg/util"
"github.com/pingcap/tidb/pkg/util/intest"
"github.com/pingcap/tidb/pkg/util/logutil"
"github.com/pingcap/tidb/pkg/util/prefetch"
"github.com/spf13/pflag"
"go.uber.org/atomic"
Expand Down Expand Up @@ -416,6 +417,8 @@ skipHandleCred:

// Reset resets the GCS storage.
func (s *GCSStorage) Reset(ctx context.Context) error {
logutil.Logger(ctx).Info("resetting gcs storage")

for _, client := range s.clients {
_ = client.Close()
}
Expand Down Expand Up @@ -451,6 +454,10 @@ func shouldRetry(err error) bool {
return true
}

if err == nil {
return false
}

// workaround for https://github.com/googleapis/google-cloud-go/issues/7440
if e := (http2.StreamError{}); goerrors.As(err, &e) {
if e.Code == http2.ErrCodeInternal {
Expand All @@ -467,7 +474,22 @@ func shouldRetry(err error) bool {
}
}

if err != nil {
errMsg := err.Error()
// workaround for strange unknown errors
retryableErrMsg := []string{
"http2: client connection force closed via ClientConn.Close",
"broken pipe",
}

for _, msg := range retryableErrMsg {
if strings.Contains(errMsg, msg) {
log.Warn("retrying gcs request", zap.Error(err))
return true
}
}

// just log the new unknown error, in case we can add it to this function
if !goerrors.Is(err, context.Canceled) {
log.Warn("other error when requesting gcs",
zap.Error(err),
zap.String("info", fmt.Sprintf("type: %T, value: %#v", err, err)))
Expand Down

0 comments on commit 44787d6

Please sign in to comment.