From 0898ce8e796a39e4f0391c37ac15b760fa1265c5 Mon Sep 17 00:00:00 2001
From: Yuanjia Zhang <qw4990@163.com>
Date: Fri, 12 Apr 2019 16:43:55 +0800
Subject: [PATCH 1/3] executor: trace and control memory usage in DistSQL layer
 (#10003)

---
 distsql/distsql.go                      |   5 +
 distsql/distsql_test.go                 |   3 +
 distsql/request_builder.go              |  10 +
 distsql/select_result.go                |  16 ++
 executor/chunk_size_control_test.go     | 235 ++++++++++++++++++++++++
 executor/distsql.go                     |   2 +
 executor/executor_required_rows_test.go |   1 +
 executor/executor_test.go               |  94 +++++++++-
 executor/table_reader.go                |   1 +
 kv/kv.go                                |   5 +
 sessionctx/variable/session.go          |   3 +
 sessionctx/variable/tidb_vars.go        |   1 +
 store/tikv/coprocessor.go               |  40 ++++
 13 files changed, 415 insertions(+), 1 deletion(-)
 create mode 100644 executor/chunk_size_control_test.go

diff --git a/distsql/distsql.go b/distsql/distsql.go
index 2abf907242141..119c3145e5bb1 100644
--- a/distsql/distsql.go
+++ b/distsql/distsql.go
@@ -45,6 +45,10 @@ func Select(ctx context.Context, sctx sessionctx.Context, kvReq *kv.Request, fie
 		return nil, errors.Trace(err)
 	}
 
+	// kvReq.MemTracker is used to trace and control memory usage in DistSQL layer;
+	// for streamResult, since it is a pipeline which has no buffer, it's not necessary to trace it;
+	// for selectResult, we just use the kvReq.MemTracker prepared for co-processor
+	// instead of creating a new one for simplification.
 	if kvReq.Streaming {
 		return &streamResult{
 			resp:       resp,
@@ -69,6 +73,7 @@ func Select(ctx context.Context, sctx sessionctx.Context, kvReq *kv.Request, fie
 		ctx:        sctx,
 		feedback:   fb,
 		sqlType:    label,
+		memTracker: kvReq.MemTracker,
 	}, nil
 }
 
diff --git a/distsql/distsql_test.go b/distsql/distsql_test.go
index bdcffbd45bffe..812660c32f8ff 100644
--- a/distsql/distsql_test.go
+++ b/distsql/distsql_test.go
@@ -308,6 +308,9 @@ func (r *mockResultSubset) GetExecDetails() *execdetails.ExecDetails {
 	return &execdetails.ExecDetails{}
 }
 
+// MemSize implements kv.ResultSubset interface.
+func (r *mockResultSubset) MemSize() int64 { return int64(cap(r.data)) }
+
 func populateBuffer() []byte {
 	numCols := 4
 	numRows := 1024
diff --git a/distsql/request_builder.go b/distsql/request_builder.go
index f66a83a5721d3..3fbce7e02b51a 100644
--- a/distsql/request_builder.go
+++ b/distsql/request_builder.go
@@ -19,12 +19,14 @@ import (
 	"github.com/pingcap/errors"
 	"github.com/pingcap/parser/mysql"
 	"github.com/pingcap/tidb/kv"
+	"github.com/pingcap/tidb/sessionctx"
 	"github.com/pingcap/tidb/sessionctx/stmtctx"
 	"github.com/pingcap/tidb/sessionctx/variable"
 	"github.com/pingcap/tidb/statistics"
 	"github.com/pingcap/tidb/tablecodec"
 	"github.com/pingcap/tidb/types"
 	"github.com/pingcap/tidb/util/codec"
+	"github.com/pingcap/tidb/util/memory"
 	"github.com/pingcap/tidb/util/ranger"
 	"github.com/pingcap/tipb/go-tipb"
 )
@@ -41,6 +43,14 @@ func (builder *RequestBuilder) Build() (*kv.Request, error) {
 	return &builder.Request, errors.Trace(builder.err)
 }
 
+// SetMemTracker sets a memTracker for this request.
+func (builder *RequestBuilder) SetMemTracker(sctx sessionctx.Context, label string) *RequestBuilder {
+	t := memory.NewTracker(label, sctx.GetSessionVars().MemQuotaDistSQL)
+	t.AttachTo(sctx.GetSessionVars().StmtCtx.MemTracker)
+	builder.Request.MemTracker = t
+	return builder
+}
+
 // SetTableRanges sets "KeyRanges" for "kv.Request" by converting "tableRanges"
 // to "KeyRanges" firstly.
 func (builder *RequestBuilder) SetTableRanges(tid int64, tableRanges []*ranger.Range, fb *statistics.QueryFeedback) *RequestBuilder {
diff --git a/distsql/select_result.go b/distsql/select_result.go
index 59e40f26b1482..797c977e68f57 100644
--- a/distsql/select_result.go
+++ b/distsql/select_result.go
@@ -25,6 +25,7 @@ import (
 	"github.com/pingcap/tidb/types"
 	"github.com/pingcap/tidb/util/chunk"
 	"github.com/pingcap/tidb/util/codec"
+	"github.com/pingcap/tidb/util/memory"
 	"github.com/pingcap/tipb/go-tipb"
 	"golang.org/x/net/context"
 )
@@ -68,6 +69,8 @@ type selectResult struct {
 	feedback     *statistics.QueryFeedback
 	partialCount int64 // number of partial results.
 	sqlType      string
+
+	memTracker *memory.Tracker
 }
 
 func (r *selectResult) Fetch(ctx context.Context) {
@@ -91,6 +94,10 @@ func (r *selectResult) fetch(ctx context.Context) {
 			return
 		}
 
+		if r.memTracker != nil {
+			r.memTracker.Consume(int64(resultSubset.MemSize()))
+		}
+
 		select {
 		case r.results <- resultWithErr{result: resultSubset}:
 		case <-r.closed:
@@ -141,15 +148,24 @@ func (r *selectResult) getSelectResp() error {
 		if re.err != nil {
 			return errors.Trace(re.err)
 		}
+		if r.memTracker != nil && r.selectResp != nil {
+			r.memTracker.Consume(-int64(r.selectResp.Size()))
+		}
 		if re.result == nil {
 			r.selectResp = nil
 			return nil
 		}
+		if r.memTracker != nil {
+			r.memTracker.Consume(-int64(re.result.MemSize()))
+		}
 		r.selectResp = new(tipb.SelectResponse)
 		err := r.selectResp.Unmarshal(re.result.GetData())
 		if err != nil {
 			return errors.Trace(err)
 		}
+		if r.memTracker != nil && r.selectResp != nil {
+			r.memTracker.Consume(int64(r.selectResp.Size()))
+		}
 		if err := r.selectResp.Error; err != nil {
 			return terror.ClassTiKV.New(terror.ErrCode(err.Code), err.Msg)
 		}
diff --git a/executor/chunk_size_control_test.go b/executor/chunk_size_control_test.go
new file mode 100644
index 0000000000000..d143f08bd831c
--- /dev/null
+++ b/executor/chunk_size_control_test.go
@@ -0,0 +1,235 @@
+// Copyright 2019 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package executor_test
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"sync"
+	"time"
+
+	. "github.com/pingcap/check"
+	"github.com/pingcap/parser/model"
+	"github.com/pingcap/tidb/domain"
+	"github.com/pingcap/tidb/kv"
+	"github.com/pingcap/tidb/session"
+	"github.com/pingcap/tidb/store/mockstore"
+	"github.com/pingcap/tidb/store/mockstore/mocktikv"
+	"github.com/pingcap/tidb/store/tikv"
+	"github.com/pingcap/tidb/store/tikv/tikvrpc"
+	"github.com/pingcap/tidb/tablecodec"
+	"github.com/pingcap/tidb/types"
+	"github.com/pingcap/tidb/util/codec"
+	"github.com/pingcap/tidb/util/testkit"
+)
+
+var (
+	_ = Suite(&testChunkSizeControlSuite{})
+)
+
+type testSlowClient struct {
+	sync.RWMutex
+	tikv.Client
+	regionDelay map[uint64]time.Duration
+}
+
+func (c *testSlowClient) SendRequest(ctx context.Context, addr string, req *tikvrpc.Request, timeout time.Duration) (*tikvrpc.Response, error) {
+	regionID := req.RegionId
+	delay := c.GetDelay(regionID)
+	if req.Type == tikvrpc.CmdCop && delay > 0 {
+		time.Sleep(delay)
+	}
+	return c.Client.SendRequest(ctx, addr, req, timeout)
+}
+
+func (c *testSlowClient) SetDelay(regionID uint64, dur time.Duration) {
+	c.Lock()
+	defer c.Unlock()
+	c.regionDelay[regionID] = dur
+}
+
+func (c *testSlowClient) GetDelay(regionID uint64) time.Duration {
+	c.RLock()
+	defer c.RUnlock()
+	return c.regionDelay[regionID]
+}
+
+// manipulateCluster splits this cluster's region by splitKeys and returns regionIDs after split
+func manipulateCluster(cluster *mocktikv.Cluster, splitKeys [][]byte) []uint64 {
+	regions := cluster.GetAllRegions()
+	if len(regions) != 1 {
+		panic("this cluster has already split")
+	}
+
+	allRegionIDs := []uint64{regions[0].Meta.Id}
+	for i, key := range splitKeys {
+		newRegionID, newPeerID := cluster.AllocID(), cluster.AllocID()
+		cluster.Split(allRegionIDs[i], newRegionID, key, []uint64{newPeerID}, newPeerID)
+		allRegionIDs = append(allRegionIDs, newRegionID)
+	}
+	return allRegionIDs
+}
+
+func generateTableSplitKeyForInt(tid int64, splitNum []int) [][]byte {
+	results := make([][]byte, 0, len(splitNum))
+	for _, num := range splitNum {
+		results = append(results, tablecodec.EncodeRowKey(tid, codec.EncodeInt(nil, int64(num))))
+	}
+	return results
+}
+
+func generateIndexSplitKeyForInt(tid, idx int64, splitNum []int) [][]byte {
+	results := make([][]byte, 0, len(splitNum))
+	for _, num := range splitNum {
+		d := new(types.Datum)
+		d.SetInt64(int64(num))
+		b, err := codec.EncodeKey(nil, nil, *d)
+		if err != nil {
+			panic(err)
+		}
+		results = append(results, tablecodec.EncodeIndexSeekKey(tid, idx, b))
+	}
+	return results
+}
+
+type testChunkSizeControlKit struct {
+	store   kv.Storage
+	dom     *domain.Domain
+	tk      *testkit.TestKit
+	client  *testSlowClient
+	cluster *mocktikv.Cluster
+}
+
+type testChunkSizeControlSuite struct {
+	m map[string]*testChunkSizeControlKit
+}
+
+func (s *testChunkSizeControlSuite) SetUpSuite(c *C) {
+	tableSQLs := map[string]string{}
+	tableSQLs["Limit&TableScan"] = "create table t (a int, primary key (a))"
+	tableSQLs["Limit&IndexScan"] = "create table t (a int, index idx_a(a))"
+
+	s.m = make(map[string]*testChunkSizeControlKit)
+	for name, sql := range tableSQLs {
+		// BootstrapSession is not thread-safe, so we have to prepare all resources in SetUp.
+		kit := new(testChunkSizeControlKit)
+		s.m[name] = kit
+		kit.client = &testSlowClient{regionDelay: make(map[uint64]time.Duration)}
+		kit.cluster = mocktikv.NewCluster()
+		mocktikv.BootstrapWithSingleStore(kit.cluster)
+
+		var err error
+		kit.store, err = mockstore.NewMockTikvStore(
+			mockstore.WithCluster(kit.cluster),
+			mockstore.WithHijackClient(func(c tikv.Client) tikv.Client {
+				kit.client.Client = c
+				return kit.client
+			}),
+		)
+		c.Assert(err, IsNil)
+
+		// init domain
+		kit.dom, err = session.BootstrapSession(kit.store)
+		c.Assert(err, IsNil)
+
+		// create the test table
+		kit.tk = testkit.NewTestKitWithInit(c, kit.store)
+		kit.tk.MustExec(sql)
+	}
+}
+
+func (s *testChunkSizeControlSuite) getKit(name string) (
+	kv.Storage, *domain.Domain, *testkit.TestKit, *testSlowClient, *mocktikv.Cluster) {
+	x := s.m[name]
+	return x.store, x.dom, x.tk, x.client, x.cluster
+}
+
+func (s *testChunkSizeControlSuite) TestLimitAndTableScan(c *C) {
+	c.Skip("not stable because coprocessor may result in goroutine leak")
+	_, dom, tk, client, cluster := s.getKit("Limit&TableScan")
+	defer client.Close()
+	tbl, err := dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
+	c.Assert(err, IsNil)
+	tid := tbl.Meta().ID
+
+	// construct two regions split by 100
+	splitKeys := generateTableSplitKeyForInt(tid, []int{100})
+	regionIDs := manipulateCluster(cluster, splitKeys)
+
+	noDelayThreshold := time.Millisecond * 100
+	delayDuration := time.Second
+	delayThreshold := delayDuration * 9 / 10
+	tk.MustExec("insert into t values (1)") // insert one record into region1, and set a delay duration
+	client.SetDelay(regionIDs[0], delayDuration)
+
+	results := tk.MustQuery("explain analyze select * from t where t.a > 0 and t.a < 200 limit 1")
+	cost := s.parseTimeCost(c, results.Rows()[0])
+	c.Assert(cost, Not(Less), delayThreshold) // have to wait for region1
+
+	tk.MustExec("insert into t values (101)") // insert one record into region2
+	results = tk.MustQuery("explain analyze select * from t where t.a > 0 and t.a < 200 limit 1")
+	cost = s.parseTimeCost(c, results.Rows()[0])
+	c.Assert(cost, Less, noDelayThreshold) // region2 return quickly
+
+	results = tk.MustQuery("explain analyze select * from t where t.a > 0 and t.a < 200 limit 2")
+	cost = s.parseTimeCost(c, results.Rows()[0])
+	c.Assert(cost, Not(Less), delayThreshold) // have to wait
+}
+
+func (s *testChunkSizeControlSuite) TestLimitAndIndexScan(c *C) {
+	c.Skip("not stable because coprocessor may result in goroutine leak")
+	_, dom, tk, client, cluster := s.getKit("Limit&IndexScan")
+	defer client.Close()
+	tbl, err := dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
+	c.Assert(err, IsNil)
+	tid := tbl.Meta().ID
+	idx := tbl.Meta().Indices[0].ID
+
+	// construct two regions split by 100
+	splitKeys := generateIndexSplitKeyForInt(tid, idx, []int{100})
+	regionIDs := manipulateCluster(cluster, splitKeys)
+
+	noDelayThreshold := time.Millisecond * 100
+	delayDuration := time.Second
+	delayThreshold := delayDuration * 9 / 10
+	tk.MustExec("insert into t values (1)") // insert one record into region1, and set a delay duration
+	client.SetDelay(regionIDs[0], delayDuration)
+
+	results := tk.MustQuery("explain analyze select * from t where t.a > 0 and t.a < 200 limit 1")
+	cost := s.parseTimeCost(c, results.Rows()[0])
+	c.Assert(cost, Not(Less), delayThreshold) // have to wait for region1
+
+	tk.MustExec("insert into t values (101)") // insert one record into region2
+	results = tk.MustQuery("explain analyze select * from t where t.a > 0 and t.a < 200 limit 1")
+	cost = s.parseTimeCost(c, results.Rows()[0])
+	c.Assert(cost, Less, noDelayThreshold) // region2 return quickly
+
+	results = tk.MustQuery("explain analyze select * from t where t.a > 0 and t.a < 200 limit 2")
+	cost = s.parseTimeCost(c, results.Rows()[0])
+	c.Assert(cost, Not(Less), delayThreshold) // have to wait
+}
+
+func (s *testChunkSizeControlSuite) parseTimeCost(c *C, line []interface{}) time.Duration {
+	lineStr := fmt.Sprintf("%v", line)
+	idx := strings.Index(lineStr, "time:")
+	c.Assert(idx, Not(Equals), -1)
+	lineStr = lineStr[idx+len("time:"):]
+	idx = strings.Index(lineStr, ",")
+	c.Assert(idx, Not(Equals), -1)
+	timeStr := lineStr[:idx]
+	d, err := time.ParseDuration(timeStr)
+	c.Assert(err, IsNil)
+	return d
+}
diff --git a/executor/distsql.go b/executor/distsql.go
index 3fd656e38c13d..bf4794fa2d511 100644
--- a/executor/distsql.go
+++ b/executor/distsql.go
@@ -291,6 +291,7 @@ func (e *IndexReaderExecutor) open(ctx context.Context, kvRanges []kv.KeyRange)
 		SetKeepOrder(e.keepOrder).
 		SetStreaming(e.streaming).
 		SetFromSessionVars(e.ctx.GetSessionVars()).
+		SetMemTracker(e.ctx, "IndexReaderDistSQLTracker").
 		Build()
 	if err != nil {
 		e.feedback.Invalidate()
@@ -421,6 +422,7 @@ func (e *IndexLookUpExecutor) startIndexWorker(ctx context.Context, kvRanges []k
 		SetKeepOrder(e.keepOrder).
 		SetStreaming(e.indexStreaming).
 		SetFromSessionVars(e.ctx.GetSessionVars()).
+		SetMemTracker(e.ctx, "IndexLookupDistSQLTracker").
 		Build()
 	if err != nil {
 		return errors.Trace(err)
diff --git a/executor/executor_required_rows_test.go b/executor/executor_required_rows_test.go
index b16ee95404c09..ec1846d754d7d 100644
--- a/executor/executor_required_rows_test.go
+++ b/executor/executor_required_rows_test.go
@@ -526,6 +526,7 @@ func (s *testExecSuite) TestProjectionUnparallelRequiredRows(c *C) {
 }
 
 func (s *testExecSuite) TestProjectionParallelRequiredRows(c *C) {
+	c.Skip("not stable because of goroutine schedule")
 	maxChunkSize := defaultCtx().GetSessionVars().MaxChunkSize
 	testCases := []struct {
 		totalRows      int
diff --git a/executor/executor_test.go b/executor/executor_test.go
index bc2163d8ee821..e47b7e01e585b 100644
--- a/executor/executor_test.go
+++ b/executor/executor_test.go
@@ -30,6 +30,7 @@ import (
 	"github.com/pingcap/errors"
 	gofail "github.com/pingcap/gofail/runtime"
 	pb "github.com/pingcap/kvproto/pkg/kvrpcpb"
+	"github.com/pingcap/log"
 	"github.com/pingcap/parser"
 	"github.com/pingcap/parser/model"
 	"github.com/pingcap/parser/mysql"
@@ -63,6 +64,8 @@ import (
 	"github.com/pingcap/tidb/util/testutil"
 	"github.com/pingcap/tidb/util/timeutil"
 	"github.com/pingcap/tipb/go-tipb"
+	"go.uber.org/zap"
+	"go.uber.org/zap/zapcore"
 	"golang.org/x/net/context"
 )
 
@@ -77,6 +80,7 @@ var _ = Suite(&testSuite{})
 var _ = Suite(&testContextOptionSuite{})
 var _ = Suite(&testBypassSuite{})
 var _ = Suite(&testUpdateSuite{})
+var _ = Suite(&testOOMSuite{})
 
 type testSuite struct {
 	cluster   *mocktikv.Cluster
@@ -2834,7 +2838,7 @@ func (s *testSuite) TestUnsignedPk(c *C) {
 
 	tk.MustExec("drop table if exists t")
 	tk.MustExec("create table t(id bigint unsigned primary key)")
-	var num1, num2 uint64 = math.MaxInt64 + 1, math.MaxInt64 + 2
+	var num1, num2 uint64 = math.MaxInt64+1, math.MaxInt64+2
 	tk.MustExec(fmt.Sprintf("insert into t values(%v), (%v), (1), (2)", num1, num2))
 	num1Str := strconv.FormatUint(num1, 10)
 	num2Str := strconv.FormatUint(num2, 10)
@@ -3452,3 +3456,91 @@ func (s *testSuite) TestDoSubquery(c *C) {
 	c.Assert(err, IsNil, Commentf("err %v", err))
 	c.Assert(r, IsNil, Commentf("result of Do not empty"))
 }
+
+type testOOMSuite struct {
+	store kv.Storage
+	do    *domain.Domain
+	oom   *oomCapturer
+}
+
+func (s *testOOMSuite) SetUpSuite(c *C) {
+	c.Skip("log.ReplaceGlobals(lg, r) in registerHook() may result in data race")
+	testleak.BeforeTest()
+	s.registerHook()
+	var err error
+	s.store, err = mockstore.NewMockTikvStore()
+	c.Assert(err, IsNil)
+	session.SetSchemaLease(0)
+	domain.RunAutoAnalyze = false
+	s.do, err = session.BootstrapSession(s.store)
+	c.Assert(err, IsNil)
+}
+
+func (s *testOOMSuite) registerHook() {
+	conf := &log.Config{Level: "info", File: log.FileLogConfig{}}
+	_, r, _ := log.InitLogger(conf)
+	s.oom = &oomCapturer{r.Core, ""}
+	lg := zap.New(s.oom)
+	log.ReplaceGlobals(lg, r)
+}
+
+func (s *testOOMSuite) TestDistSQLMemoryControl(c *C) {
+	tk := testkit.NewTestKit(c, s.store)
+	tk.MustExec("use test")
+	tk.MustExec("drop table if exists t")
+	tk.MustExec("create table t (id int, a int, b int, index idx_a(`a`))")
+	tk.MustExec("insert into t values (1,1,1), (2,2,2), (3,3,3)")
+
+	s.oom.tracker = ""
+	tk.MustQuery("select * from t")
+	c.Assert(s.oom.tracker, Equals, "")
+	tk.Se.GetSessionVars().MemQuotaDistSQL = 1
+	tk.MustQuery("select * from t")
+	c.Assert(s.oom.tracker, Equals, "TableReaderDistSQLTracker")
+	tk.Se.GetSessionVars().MemQuotaDistSQL = -1
+
+	s.oom.tracker = ""
+	tk.MustQuery("select a from t")
+	c.Assert(s.oom.tracker, Equals, "")
+	tk.Se.GetSessionVars().MemQuotaDistSQL = 1
+	tk.MustQuery("select a from t use index(idx_a)")
+	c.Assert(s.oom.tracker, Equals, "IndexReaderDistSQLTracker")
+	tk.Se.GetSessionVars().MemQuotaDistSQL = -1
+
+	s.oom.tracker = ""
+	tk.MustQuery("select * from t")
+	c.Assert(s.oom.tracker, Equals, "")
+	tk.Se.GetSessionVars().MemQuotaDistSQL = 1
+	tk.MustQuery("select * from t use index(idx_a)")
+	c.Assert(s.oom.tracker, Equals, "IndexLookupDistSQLTracker")
+	tk.Se.GetSessionVars().MemQuotaDistSQL = -1
+}
+
+type oomCapturer struct {
+	zapcore.Core
+	tracker string
+}
+
+func (h *oomCapturer) Write(entry zapcore.Entry, fields []zapcore.Field) error {
+	if strings.Contains(entry.Message, "memory exceeds quota") {
+		err, _ := fields[0].Interface.(error)
+		str := err.Error()
+		begin := strings.Index(str, "8001]")
+		if begin == -1 {
+			panic("begin not found")
+		}
+		end := strings.Index(str, " holds")
+		if end == -1 {
+			panic("end not found")
+		}
+		h.tracker = str[begin+len("8001]") : end]
+	}
+	return nil
+}
+
+func (h *oomCapturer) Check(e zapcore.Entry, ce *zapcore.CheckedEntry) *zapcore.CheckedEntry {
+	if h.Enabled(e.Level) {
+		return ce.AddCore(e, h)
+	}
+	return ce
+}
diff --git a/executor/table_reader.go b/executor/table_reader.go
index 45aeef858860d..9c0e528b793e1 100644
--- a/executor/table_reader.go
+++ b/executor/table_reader.go
@@ -147,6 +147,7 @@ func (e *TableReaderExecutor) buildResp(ctx context.Context, ranges []*ranger.Ra
 		SetKeepOrder(e.keepOrder).
 		SetStreaming(e.streaming).
 		SetFromSessionVars(e.ctx.GetSessionVars()).
+		SetMemTracker(e.ctx, "TableReaderDistSQLTracker").
 		Build()
 	if err != nil {
 		return nil, errors.Trace(err)
diff --git a/kv/kv.go b/kv/kv.go
index c8b7cdd71aaac..ebeebc98d862a 100644
--- a/kv/kv.go
+++ b/kv/kv.go
@@ -17,6 +17,7 @@ import (
 	"github.com/pingcap/tidb/config"
 	"github.com/pingcap/tidb/store/tikv/oracle"
 	"github.com/pingcap/tidb/util/execdetails"
+	"github.com/pingcap/tidb/util/memory"
 	"golang.org/x/net/context"
 )
 
@@ -209,6 +210,8 @@ type Request struct {
 	// Streaming indicates using streaming API for this request, result in that one Next()
 	// call would not corresponds to a whole region result.
 	Streaming bool
+	// MemTracker is used to trace and control memory usage in co-processor layer.
+	MemTracker *memory.Tracker
 }
 
 // ResultSubset represents a result subset from a single storage unit.
@@ -220,6 +223,8 @@ type ResultSubset interface {
 	GetStartKey() Key
 	// GetExecDetails gets the detail information.
 	GetExecDetails() *execdetails.ExecDetails
+	// MemSize returns how many bytes of memory this result use for tracing memory usage.
+	MemSize() int64
 }
 
 // Response represents the response returned from KV layer.
diff --git a/sessionctx/variable/session.go b/sessionctx/variable/session.go
index 7fa00ae28afff..ff58d6670aa3b 100644
--- a/sessionctx/variable/session.go
+++ b/sessionctx/variable/session.go
@@ -381,6 +381,7 @@ func NewSessionVars() *SessionVars {
 		MemQuotaIndexLookupReader: DefTiDBMemQuotaIndexLookupReader,
 		MemQuotaIndexLookupJoin:   DefTiDBMemQuotaIndexLookupJoin,
 		MemQuotaNestedLoopApply:   DefTiDBMemQuotaNestedLoopApply,
+		MemQuotaDistSQL:           DefTiDBMemQuotaDistSQL,
 	}
 	vars.BatchSize = BatchSize{
 		IndexJoinBatchSize: DefIndexJoinBatchSize,
@@ -732,6 +733,8 @@ type MemQuota struct {
 	MemQuotaIndexLookupJoin int64
 	// MemQuotaNestedLoopApply defines the memory quota for a nested loop apply executor.
 	MemQuotaNestedLoopApply int64
+	// MemQuotaDistSQL defines the memory quota for all operators in DistSQL layer like co-processor and selectResult.
+	MemQuotaDistSQL int64
 }
 
 // BatchSize defines batch size values.
diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go
index 0ce8a67eec7e3..63cc1c9d217f2 100644
--- a/sessionctx/variable/tidb_vars.go
+++ b/sessionctx/variable/tidb_vars.go
@@ -245,6 +245,7 @@ const (
 	DefTiDBMemQuotaIndexLookupReader = 32 << 30 // 32GB.
 	DefTiDBMemQuotaIndexLookupJoin   = 32 << 30 // 32GB.
 	DefTiDBMemQuotaNestedLoopApply   = 32 << 30 // 32GB.
+	DefTiDBMemQuotaDistSQL           = 32 << 30 // 32GB.
 	DefTiDBGeneralLog                = 0
 	DefTiDBRetryLimit                = 10
 	DefTiDBDisableTxnAutoRetry       = false
diff --git a/store/tikv/coprocessor.go b/store/tikv/coprocessor.go
index 783e5df9ac2fc..c11b4718c2d0c 100644
--- a/store/tikv/coprocessor.go
+++ b/store/tikv/coprocessor.go
@@ -22,6 +22,7 @@ import (
 	"sync"
 	"sync/atomic"
 	"time"
+	"unsafe"
 
 	"github.com/cznic/mathutil"
 	"github.com/pingcap/errors"
@@ -32,6 +33,7 @@ import (
 	"github.com/pingcap/tidb/store/tikv/tikvrpc"
 	"github.com/pingcap/tidb/util/execdetails"
 	"github.com/pingcap/tidb/util/logutil"
+	"github.com/pingcap/tidb/util/memory"
 	"github.com/pingcap/tipb/go-tipb"
 	"go.uber.org/zap"
 	"golang.org/x/net/context"
@@ -93,6 +95,7 @@ func (c *CopClient) Send(ctx context.Context, req *kv.Request, vars *kv.Variable
 		concurrency: req.Concurrency,
 		finishCh:    make(chan struct{}),
 		vars:        vars,
+		memTracker:  req.MemTracker,
 	}
 	it.tasks = tasks
 	if it.concurrency > len(tasks) {
@@ -370,6 +373,8 @@ type copIterator struct {
 	wg       sync.WaitGroup
 
 	vars *kv.Variables
+
+	memTracker *memory.Tracker
 }
 
 // copIteratorWorker receives tasks from copIteratorTaskSender, handles tasks and sends the copResponse to respChan.
@@ -381,6 +386,8 @@ type copIteratorWorker struct {
 	respChan chan<- *copResponse
 	finishCh <-chan struct{}
 	vars     *kv.Variables
+
+	memTracker *memory.Tracker
 }
 
 // copIteratorTaskSender sends tasks to taskCh then wait for the workers to exit.
@@ -397,8 +404,14 @@ type copResponse struct {
 	execdetails.ExecDetails
 	startKey kv.Key
 	err      error
+	respSize int64
 }
 
+const (
+	sizeofExecDetails   = int(unsafe.Sizeof(execdetails.ExecDetails{}))
+	sizeofCommitDetails = int(unsafe.Sizeof(execdetails.CommitDetails{}))
+)
+
 // GetData implements the kv.ResultSubset GetData interface.
 func (rs *copResponse) GetData() []byte {
 	return rs.pbResp.Data
@@ -413,6 +426,25 @@ func (rs *copResponse) GetExecDetails() *execdetails.ExecDetails {
 	return &rs.ExecDetails
 }
 
+// MemSize returns how many bytes of memory this response use
+func (rs *copResponse) MemSize() int64 {
+	if rs.respSize != 0 {
+		return rs.respSize
+	}
+
+	// ignore rs.err
+	rs.respSize += int64(cap(rs.startKey))
+	rs.respSize += int64(sizeofExecDetails)
+	if rs.CommitDetail != nil {
+		rs.respSize += int64(sizeofCommitDetails)
+	}
+	if rs.pbResp != nil {
+		// Using a approximate size since it's hard to get a accurate value.
+		rs.respSize += int64(rs.pbResp.Size())
+	}
+	return rs.respSize
+}
+
 const minLogCopTaskTime = 300 * time.Millisecond
 
 // run is a worker function that get a copTask from channel, handle it and
@@ -453,6 +485,8 @@ func (it *copIterator) open(ctx context.Context) {
 			respChan: it.respChan,
 			finishCh: it.finishCh,
 			vars:     it.vars,
+
+			memTracker: it.memTracker,
 		}
 		go worker.run(ctx)
 	}
@@ -486,6 +520,9 @@ func (sender *copIteratorTaskSender) run() {
 func (it *copIterator) recvFromRespCh(ctx context.Context, respCh <-chan *copResponse) (resp *copResponse, ok bool, exit bool) {
 	select {
 	case resp, ok = <-respCh:
+		if it.memTracker != nil && resp != nil {
+			it.memTracker.Consume(-int64(resp.MemSize()))
+		}
 	case <-it.finishCh:
 		exit = true
 	case <-ctx.Done():
@@ -508,6 +545,9 @@ func (sender *copIteratorTaskSender) sendToTaskCh(t *copTask) (exit bool) {
 }
 
 func (worker *copIteratorWorker) sendToRespCh(resp *copResponse, respCh chan<- *copResponse) (exit bool) {
+	if worker.memTracker != nil {
+		worker.memTracker.Consume(int64(resp.MemSize()))
+	}
 	select {
 	case respCh <- resp:
 	case <-worker.finishCh:

From 1c3623210a73891b0434789280e2df8829cf783b Mon Sep 17 00:00:00 2001
From: qw4990 <qw4990@163.com>
Date: Fri, 19 Apr 2019 10:58:42 +0800
Subject: [PATCH 2/3] fix

---
 executor/chunk_size_control_test.go | 235 ----------------------------
 1 file changed, 235 deletions(-)
 delete mode 100644 executor/chunk_size_control_test.go

diff --git a/executor/chunk_size_control_test.go b/executor/chunk_size_control_test.go
deleted file mode 100644
index d143f08bd831c..0000000000000
--- a/executor/chunk_size_control_test.go
+++ /dev/null
@@ -1,235 +0,0 @@
-// Copyright 2019 PingCAP, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package executor_test
-
-import (
-	"context"
-	"fmt"
-	"strings"
-	"sync"
-	"time"
-
-	. "github.com/pingcap/check"
-	"github.com/pingcap/parser/model"
-	"github.com/pingcap/tidb/domain"
-	"github.com/pingcap/tidb/kv"
-	"github.com/pingcap/tidb/session"
-	"github.com/pingcap/tidb/store/mockstore"
-	"github.com/pingcap/tidb/store/mockstore/mocktikv"
-	"github.com/pingcap/tidb/store/tikv"
-	"github.com/pingcap/tidb/store/tikv/tikvrpc"
-	"github.com/pingcap/tidb/tablecodec"
-	"github.com/pingcap/tidb/types"
-	"github.com/pingcap/tidb/util/codec"
-	"github.com/pingcap/tidb/util/testkit"
-)
-
-var (
-	_ = Suite(&testChunkSizeControlSuite{})
-)
-
-type testSlowClient struct {
-	sync.RWMutex
-	tikv.Client
-	regionDelay map[uint64]time.Duration
-}
-
-func (c *testSlowClient) SendRequest(ctx context.Context, addr string, req *tikvrpc.Request, timeout time.Duration) (*tikvrpc.Response, error) {
-	regionID := req.RegionId
-	delay := c.GetDelay(regionID)
-	if req.Type == tikvrpc.CmdCop && delay > 0 {
-		time.Sleep(delay)
-	}
-	return c.Client.SendRequest(ctx, addr, req, timeout)
-}
-
-func (c *testSlowClient) SetDelay(regionID uint64, dur time.Duration) {
-	c.Lock()
-	defer c.Unlock()
-	c.regionDelay[regionID] = dur
-}
-
-func (c *testSlowClient) GetDelay(regionID uint64) time.Duration {
-	c.RLock()
-	defer c.RUnlock()
-	return c.regionDelay[regionID]
-}
-
-// manipulateCluster splits this cluster's region by splitKeys and returns regionIDs after split
-func manipulateCluster(cluster *mocktikv.Cluster, splitKeys [][]byte) []uint64 {
-	regions := cluster.GetAllRegions()
-	if len(regions) != 1 {
-		panic("this cluster has already split")
-	}
-
-	allRegionIDs := []uint64{regions[0].Meta.Id}
-	for i, key := range splitKeys {
-		newRegionID, newPeerID := cluster.AllocID(), cluster.AllocID()
-		cluster.Split(allRegionIDs[i], newRegionID, key, []uint64{newPeerID}, newPeerID)
-		allRegionIDs = append(allRegionIDs, newRegionID)
-	}
-	return allRegionIDs
-}
-
-func generateTableSplitKeyForInt(tid int64, splitNum []int) [][]byte {
-	results := make([][]byte, 0, len(splitNum))
-	for _, num := range splitNum {
-		results = append(results, tablecodec.EncodeRowKey(tid, codec.EncodeInt(nil, int64(num))))
-	}
-	return results
-}
-
-func generateIndexSplitKeyForInt(tid, idx int64, splitNum []int) [][]byte {
-	results := make([][]byte, 0, len(splitNum))
-	for _, num := range splitNum {
-		d := new(types.Datum)
-		d.SetInt64(int64(num))
-		b, err := codec.EncodeKey(nil, nil, *d)
-		if err != nil {
-			panic(err)
-		}
-		results = append(results, tablecodec.EncodeIndexSeekKey(tid, idx, b))
-	}
-	return results
-}
-
-type testChunkSizeControlKit struct {
-	store   kv.Storage
-	dom     *domain.Domain
-	tk      *testkit.TestKit
-	client  *testSlowClient
-	cluster *mocktikv.Cluster
-}
-
-type testChunkSizeControlSuite struct {
-	m map[string]*testChunkSizeControlKit
-}
-
-func (s *testChunkSizeControlSuite) SetUpSuite(c *C) {
-	tableSQLs := map[string]string{}
-	tableSQLs["Limit&TableScan"] = "create table t (a int, primary key (a))"
-	tableSQLs["Limit&IndexScan"] = "create table t (a int, index idx_a(a))"
-
-	s.m = make(map[string]*testChunkSizeControlKit)
-	for name, sql := range tableSQLs {
-		// BootstrapSession is not thread-safe, so we have to prepare all resources in SetUp.
-		kit := new(testChunkSizeControlKit)
-		s.m[name] = kit
-		kit.client = &testSlowClient{regionDelay: make(map[uint64]time.Duration)}
-		kit.cluster = mocktikv.NewCluster()
-		mocktikv.BootstrapWithSingleStore(kit.cluster)
-
-		var err error
-		kit.store, err = mockstore.NewMockTikvStore(
-			mockstore.WithCluster(kit.cluster),
-			mockstore.WithHijackClient(func(c tikv.Client) tikv.Client {
-				kit.client.Client = c
-				return kit.client
-			}),
-		)
-		c.Assert(err, IsNil)
-
-		// init domain
-		kit.dom, err = session.BootstrapSession(kit.store)
-		c.Assert(err, IsNil)
-
-		// create the test table
-		kit.tk = testkit.NewTestKitWithInit(c, kit.store)
-		kit.tk.MustExec(sql)
-	}
-}
-
-func (s *testChunkSizeControlSuite) getKit(name string) (
-	kv.Storage, *domain.Domain, *testkit.TestKit, *testSlowClient, *mocktikv.Cluster) {
-	x := s.m[name]
-	return x.store, x.dom, x.tk, x.client, x.cluster
-}
-
-func (s *testChunkSizeControlSuite) TestLimitAndTableScan(c *C) {
-	c.Skip("not stable because coprocessor may result in goroutine leak")
-	_, dom, tk, client, cluster := s.getKit("Limit&TableScan")
-	defer client.Close()
-	tbl, err := dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
-	c.Assert(err, IsNil)
-	tid := tbl.Meta().ID
-
-	// construct two regions split by 100
-	splitKeys := generateTableSplitKeyForInt(tid, []int{100})
-	regionIDs := manipulateCluster(cluster, splitKeys)
-
-	noDelayThreshold := time.Millisecond * 100
-	delayDuration := time.Second
-	delayThreshold := delayDuration * 9 / 10
-	tk.MustExec("insert into t values (1)") // insert one record into region1, and set a delay duration
-	client.SetDelay(regionIDs[0], delayDuration)
-
-	results := tk.MustQuery("explain analyze select * from t where t.a > 0 and t.a < 200 limit 1")
-	cost := s.parseTimeCost(c, results.Rows()[0])
-	c.Assert(cost, Not(Less), delayThreshold) // have to wait for region1
-
-	tk.MustExec("insert into t values (101)") // insert one record into region2
-	results = tk.MustQuery("explain analyze select * from t where t.a > 0 and t.a < 200 limit 1")
-	cost = s.parseTimeCost(c, results.Rows()[0])
-	c.Assert(cost, Less, noDelayThreshold) // region2 return quickly
-
-	results = tk.MustQuery("explain analyze select * from t where t.a > 0 and t.a < 200 limit 2")
-	cost = s.parseTimeCost(c, results.Rows()[0])
-	c.Assert(cost, Not(Less), delayThreshold) // have to wait
-}
-
-func (s *testChunkSizeControlSuite) TestLimitAndIndexScan(c *C) {
-	c.Skip("not stable because coprocessor may result in goroutine leak")
-	_, dom, tk, client, cluster := s.getKit("Limit&IndexScan")
-	defer client.Close()
-	tbl, err := dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
-	c.Assert(err, IsNil)
-	tid := tbl.Meta().ID
-	idx := tbl.Meta().Indices[0].ID
-
-	// construct two regions split by 100
-	splitKeys := generateIndexSplitKeyForInt(tid, idx, []int{100})
-	regionIDs := manipulateCluster(cluster, splitKeys)
-
-	noDelayThreshold := time.Millisecond * 100
-	delayDuration := time.Second
-	delayThreshold := delayDuration * 9 / 10
-	tk.MustExec("insert into t values (1)") // insert one record into region1, and set a delay duration
-	client.SetDelay(regionIDs[0], delayDuration)
-
-	results := tk.MustQuery("explain analyze select * from t where t.a > 0 and t.a < 200 limit 1")
-	cost := s.parseTimeCost(c, results.Rows()[0])
-	c.Assert(cost, Not(Less), delayThreshold) // have to wait for region1
-
-	tk.MustExec("insert into t values (101)") // insert one record into region2
-	results = tk.MustQuery("explain analyze select * from t where t.a > 0 and t.a < 200 limit 1")
-	cost = s.parseTimeCost(c, results.Rows()[0])
-	c.Assert(cost, Less, noDelayThreshold) // region2 return quickly
-
-	results = tk.MustQuery("explain analyze select * from t where t.a > 0 and t.a < 200 limit 2")
-	cost = s.parseTimeCost(c, results.Rows()[0])
-	c.Assert(cost, Not(Less), delayThreshold) // have to wait
-}
-
-func (s *testChunkSizeControlSuite) parseTimeCost(c *C, line []interface{}) time.Duration {
-	lineStr := fmt.Sprintf("%v", line)
-	idx := strings.Index(lineStr, "time:")
-	c.Assert(idx, Not(Equals), -1)
-	lineStr = lineStr[idx+len("time:"):]
-	idx = strings.Index(lineStr, ",")
-	c.Assert(idx, Not(Equals), -1)
-	timeStr := lineStr[:idx]
-	d, err := time.ParseDuration(timeStr)
-	c.Assert(err, IsNil)
-	return d
-}

From 284eba5bb6ed18cb4c8f832e3004cdeba64b8bd4 Mon Sep 17 00:00:00 2001
From: qw4990 <qw4990@163.com>
Date: Fri, 19 Apr 2019 11:23:51 +0800
Subject: [PATCH 3/3] refmt

---
 executor/executor_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/executor/executor_test.go b/executor/executor_test.go
index e47b7e01e585b..c6e40801d2fa6 100644
--- a/executor/executor_test.go
+++ b/executor/executor_test.go
@@ -2838,7 +2838,7 @@ func (s *testSuite) TestUnsignedPk(c *C) {
 
 	tk.MustExec("drop table if exists t")
 	tk.MustExec("create table t(id bigint unsigned primary key)")
-	var num1, num2 uint64 = math.MaxInt64+1, math.MaxInt64+2
+	var num1, num2 uint64 = math.MaxInt64 + 1, math.MaxInt64 + 2
 	tk.MustExec(fmt.Sprintf("insert into t values(%v), (%v), (1), (2)", num1, num2))
 	num1Str := strconv.FormatUint(num1, 10)
 	num2Str := strconv.FormatUint(num2, 10)